{"id":15863,"date":"2024-01-18T01:11:16","date_gmt":"2024-01-17T21:41:16","guid":{"rendered":"https:\/\/rasanegar.com\/blog\/%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/"},"modified":"2024-01-18T01:11:16","modified_gmt":"2024-01-17T21:41:16","slug":"%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86","status":"publish","type":"post","link":"https:\/\/rasanegaar.com\/blog\/%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/","title":{"rendered":"\u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u0631\u0634\u062a\u0647 \u0647\u0627 \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646"},"content":{"rendered":"<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_85 counter-hierarchy ez-toc-counter ez-toc-custom ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\"><p class=\"ez-toc-title\" style=\"cursor:inherit\">\u0633\u0631\u0641\u0635\u0644\u0647\u0627\u06cc \u0645\u0637\u0644\u0628<\/p>\n<\/div><nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%ad%d8%b0%d9%81_stop_words_%d8%a8%d8%a7_%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\" >\u062d\u0630\u0641 Stop Words \u0628\u0627 \u067e\u0627\u06cc\u062a\u0648\u0646<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%a8%d8%a7_%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87_nltk_%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\" >\u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 NLTK \u067e\u0627\u06cc\u062a\u0648\u0646<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%a7%d9%81%d8%b2%d9%88%d8%af%d9%86_%db%8c%d8%a7_%d8%ad%d8%b0%d9%81_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%aa%d9%88%d9%82%d9%81_%d8%af%d8%b1_%d9%81%d9%87%d8%b1%d8%b3%d8%aa_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%aa%d9%88%d9%82%d9%81_%d9%be%db%8c%d8%b4%e2%80%8c%d9%81%d8%b1%d8%b6_nltk\" >\u0627\u0641\u0632\u0648\u062f\u0646 \u06cc\u0627 \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 \u0641\u0647\u0631\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 NLTK<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%a8%d8%a7_%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87_gensim_%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\" >\u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u067e\u0627\u06cc\u062a\u0648\u0646<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%a7%d9%81%d8%b2%d9%88%d8%af%d9%86_%d9%88_%d8%ad%d8%b0%d9%81_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%aa%d9%88%d9%82%d9%81_%d8%af%d8%b1_%d9%81%d9%87%d8%b1%d8%b3%d8%aa_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%aa%d9%88%d9%82%d9%81_%d9%be%db%8c%d8%b4%e2%80%8c%d9%81%d8%b1%d8%b6_gensim\" >\u0627\u0641\u0632\u0648\u062f\u0646 \u0648 \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 \u0641\u0647\u0631\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 Gensim<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%a8%d8%a7_%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87_spacy\" >\u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 SpaCy<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%a7%d9%81%d8%b2%d9%88%d8%af%d9%86_%d9%88_%d8%ad%d8%b0%d9%81_stop_words_%d8%af%d8%b1_spacy_default_stop_word_list\" >\u0627\u0641\u0632\u0648\u062f\u0646 \u0648 \u062d\u0630\u0641 Stop Words \u062f\u0631 SpaCy Default Stop Word List<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%d8%a7%d8%b3%da%a9%d8%b1%db%8c%d9%be%d8%aa_%d8%b3%d9%81%d8%a7%d8%b1%d8%b4%db%8c_%d8%a8%d8%b1%d8%a7%db%8c_%d8%ad%d8%b0%d9%81_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%aa%d9%88%d9%82%d9%81\" >\u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0633\u0641\u0627\u0631\u0634\u06cc \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%ad%d8%b0%d9%81-%da%a9%d9%84%d9%85%d8%a7%d8%aa-%d8%aa%d9%88%d9%82%d9%81-%d8%a7%d8%b2-%d8%b1%d8%b4%d8%aa%d9%87-%d9%87%d8%a7-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d9%86%d8%aa%db%8c%d8%ac%d9%87\" >\u0646\u062a\u06cc\u062c\u0647<\/a><\/li><\/ul><\/li><\/ul><\/nav><\/div>\n<span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 13<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span><p> <br \/>\n<\/p>\n<div><noscript><\/noscript><\/p>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0642\u0635\u062f \u062f\u0627\u0631\u06cc\u062f \u062a\u06a9\u0646\u06cc\u06a9 \u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641\u06cc \u0631\u0627 \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u0631\u0634\u062a\u0647 \u0647\u0627 \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0645\u0634\u0627\u0647\u062f\u0647 \u06a9\u0646\u06cc\u062f.  \u06a9\u0644\u0645\u0627\u062a Stop \u0622\u0646 \u062f\u0633\u062a\u0647 \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u062f\u0631 \u0632\u0628\u0627\u0646 \u0637\u0628\u06cc\u0639\u06cc \u0647\u0633\u062a\u0646\u062f \u06a9\u0647 \u0645\u0639\u0646\u06cc \u0628\u0633\u06cc\u0627\u0631 \u06a9\u0645\u06cc \u062f\u0627\u0631\u0646\u062f\u060c \u0645\u0627\u0646\u0646\u062f &#8220;is&#8221;\u060c &#8220;an&#8221;\u060c &#8220;the&#8221;\u060c \u0648 \u063a\u06cc\u0631\u0647. \u0645\u0648\u062a\u0648\u0631\u0647\u0627\u06cc \u062c\u0633\u062a\u062c\u0648 \u0648 \u0633\u0627\u06cc\u0631 \u067e\u0644\u062a\u0641\u0631\u0645\u200c\u0647\u0627\u06cc \u0646\u0645\u0627\u06cc\u0647\u200c\u0633\u0627\u0632\u06cc \u0633\u0627\u0632\u0645\u0627\u0646\u06cc \u0627\u063a\u0644\u0628 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u0641\u06cc\u0644\u062a\u0631 \u0645\u06cc\u200c\u06a9\u0646\u0646\u062f \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u0646\u062a\u0627\u06cc\u062c \u0631\u0627 \u0627\u0632 \u067e\u0627\u06cc\u06af\u0627\u0647 \u062f\u0627\u062f\u0647 \u062f\u0631 \u0645\u0642\u0627\u0628\u0644 \u062f\u0631\u06cc\u0627\u0641\u062a \u0645\u06cc\u200c\u06a9\u0646\u0646\u062f. \u06a9\u0627\u0631\u0628\u0631 \u067e\u0631\u0633 \u0648 \u062c\u0648 \u0645\u06cc \u06a9\u0646\u062f<\/p>\n<p>\u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u063a\u0644\u0628 \u0642\u0628\u0644 \u0627\u0632 \u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644\u200c\u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0639\u0645\u06cc\u0642 \u0648 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0627\u0634\u06cc\u0646 \u0627\u0632 \u0645\u062a\u0646 \u062d\u0630\u0641 \u0645\u06cc\u200c\u0634\u0648\u0646\u062f\u060c \u0632\u06cc\u0631\u0627 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0628\u0647 \u0648\u0641\u0648\u0631 \u0627\u062a\u0641\u0627\u0642 \u0645\u06cc\u200c\u0627\u0641\u062a\u0646\u062f\u060c \u0627\u0632 \u0627\u06cc\u0646 \u0631\u0648 \u0627\u0637\u0644\u0627\u0639\u0627\u062a \u0645\u0646\u062d\u0635\u0631 \u0628\u0647 \u0641\u0631\u062f \u06a9\u0645\u06cc \u0627\u0631\u0627\u0626\u0647 \u0645\u06cc\u200c\u0634\u0648\u062f \u06a9\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u062f \u0628\u0631\u0627\u06cc \u0637\u0628\u0642\u0647\u200c\u0628\u0646\u062f\u06cc \u06cc\u0627 \u062e\u0648\u0634\u0647\u200c\u0628\u0646\u062f\u06cc \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0634\u0648\u062f.<\/p>\n<h2 id=\"removingstopwordswithpython\"><span class=\"ez-toc-section\" id=\"%d8%ad%d8%b0%d9%81_stop_words_%d8%a8%d8%a7_%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\"><\/span>\u062d\u0630\u0641 Stop Words \u0628\u0627 \u067e\u0627\u06cc\u062a\u0648\u0646<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0628\u0627 \u0632\u0628\u0627\u0646 \u0628\u0631\u0646\u0627\u0645\u0647 \u0646\u0648\u06cc\u0633\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646\u060c \u06af\u0632\u06cc\u0646\u0647 \u0647\u0627\u06cc \u0628\u06cc \u0634\u0645\u0627\u0631\u06cc \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u0631\u0634\u062a\u0647 \u0647\u0627 \u062f\u0627\u0631\u06cc\u062f.  \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u0632 \u06cc\u06a9\u06cc \u0627\u0632 \u0686\u0646\u062f\u06cc\u0646 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u067e\u0631\u062f\u0627\u0632\u0634 \u0632\u0628\u0627\u0646 \u0637\u0628\u06cc\u0639\u06cc \u0645\u0627\u0646\u0646\u062f NLTK\u060c SpaCy\u060c Gensim\u060c TextBlob \u0648 \u063a\u06cc\u0631\u0647 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f \u06cc\u0627 \u0627\u06af\u0631 \u0628\u0647 \u06a9\u0646\u062a\u0631\u0644 \u06a9\u0627\u0645\u0644 \u0646\u06cc\u0627\u0632 \u062f\u0627\u0631\u06cc\u062f. \u0631\u0648\u06cc \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641\u06cc \u06a9\u0647 \u0645\u06cc \u062e\u0648\u0627\u0647\u06cc\u062f \u062d\u0630\u0641 \u06a9\u0646\u06cc\u062f\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0633\u0641\u0627\u0631\u0634\u06cc \u062e\u0648\u062f \u0631\u0627 \u0628\u0646\u0648\u06cc\u0633\u06cc\u062f.<\/p>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0634\u0645\u0627 \u0628\u0633\u062a\u0647 \u0628\u0647 \u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627\u06cc \u0645\u062a\u0641\u0627\u0648\u062a\u06cc \u0631\u0627 \u0645\u0634\u0627\u0647\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u062f \u06a9\u0631\u062f \u0631\u0648\u06cc \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 NLP \u06a9\u0647 \u0627\u0632 \u0622\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u062f.<\/p>\n<h3 id=\"usingpythonsnltklibrary\"><span class=\"ez-toc-section\" id=\"%d8%a8%d8%a7_%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87_nltk_%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\"><\/span>\u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 NLTK \u067e\u0627\u06cc\u062a\u0648\u0646<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0631\u0627 <a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/www.nltk.org\/\">NLTK<\/a> \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u06cc\u06a9\u06cc \u0627\u0632 \u0642\u062f\u06cc\u0645\u06cc \u062a\u0631\u06cc\u0646 \u0648 \u0631\u0627\u06cc\u062c \u062a\u0631\u06cc\u0646 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc \u067e\u0631\u062f\u0627\u0632\u0634 \u0632\u0628\u0627\u0646 \u0637\u0628\u06cc\u0639\u06cc \u0627\u0633\u062a.  NLTK \u0627\u0632 \u062d\u0630\u0641 \u06a9\u0644\u0645\u0647 \u062a\u0648\u0642\u0641 \u067e\u0634\u062a\u06cc\u0628\u0627\u0646\u06cc \u0645\u06cc \u06a9\u0646\u062f \u0648 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0644\u06cc\u0633\u062a\u06cc \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u062f\u0631 \u0622\u0646 \u067e\u06cc\u062f\u0627 \u06a9\u0646\u06cc\u062f <code>corpus<\/code> \u0645\u062f\u0648\u0644.  \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u06cc\u06a9 \u062c\u0645\u0644\u0647\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0645\u062a\u0646 \u062e\u0648\u062f \u0631\u0627 \u0628\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0642\u0633\u06cc\u0645 \u06a9\u0646\u06cc\u062f \u0648 \u0633\u067e\u0633 \u062f\u0631 \u0635\u0648\u0631\u062a \u062e\u0627\u0631\u062c \u0634\u062f\u0646 \u0627\u0632 \u0644\u06cc\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0631\u0627\u0626\u0647 \u0634\u062f\u0647 \u062a\u0648\u0633\u0637 NLTK\u060c \u06a9\u0644\u0645\u0647 \u0631\u0627 \u062d\u0630\u0641 \u06a9\u0646\u06cc\u062f.<\/p>\n<p>\u0628\u06cc\u0627\u06cc\u06cc\u062f \u06cc\u06a9 \u0645\u062b\u0627\u0644 \u0633\u0627\u062f\u0647 \u0631\u0627 \u0628\u0628\u06cc\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> nltk.corpus <span class=\"hljs-keyword\">import<\/span> stopwords\nnltk.download(<span class=\"hljs-string\">'stopwords'<\/span>)\n<span class=\"hljs-keyword\">from<\/span> nltk.tokenize <span class=\"hljs-keyword\">import<\/span> word_tokenize\n\ntext = <span class=\"hljs-string\">\"Nick likes to play football, however he is not too fond of tennis.\"<\/span>\ntext_tokens = word_tokenize(text)\n\ntokens_without_sw = (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> text_tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-keyword\">not<\/span> word <span class=\"hljs-keyword\">in<\/span> stopwords.words())\n\n<span class=\"hljs-built_in\">print<\/span>(tokens_without_sw)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627\u060c \u0645\u0627 \u0627\u0628\u062a\u062f\u0627 import \u0631\u0627 <code>stopwords<\/code> \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u0632 <code>nltk.corpus<\/code> \u0645\u062f\u0648\u0644.  \u0628\u0639\u062f\u060c \u0645\u0627 import \u0631\u0627 <code>word_tokenize()<\/code> \u0631\u0648\u0634 \u0627\u0632 <code>nltk.tokenize<\/code> \u06a9\u0644\u0627\u0633  \u0633\u067e\u0633 \u06cc\u06a9 \u0645\u062a\u063a\u06cc\u0631 \u0627\u06cc\u062c\u0627\u062f \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>text<\/code>\u060c \u06a9\u0647 \u0634\u0627\u0645\u0644 \u06cc\u06a9 \u062c\u0645\u0644\u0647 \u0633\u0627\u062f\u0647 \u0627\u0633\u062a.  \u062c\u0645\u0644\u0647 \u062f\u0631 <code>text<\/code> \u0645\u062a\u063a\u06cc\u0631 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0639\u0644\u0627\u0645\u062a \u0646\u0634\u0627\u0646\u0647 \u06af\u0630\u0627\u0631\u06cc \u0645\u06cc \u0634\u0648\u062f (\u0628\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0642\u0633\u06cc\u0645 \u0645\u06cc \u0634\u0648\u062f). <code>word_tokenize()<\/code> \u0631\u0648\u0634.  \u062f\u0631 \u0645\u0631\u062d\u0644\u0647 \u0628\u0639\u062f\u060c \u062a\u0645\u0627\u0645 \u06a9\u0644\u0645\u0627\u062a \u0645\u0648\u062c\u0648\u062f \u062f\u0631 \u0622\u0646 \u0631\u0627 \u062a\u06a9\u0631\u0627\u0631 \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>text_tokens<\/code> \u0644\u06cc\u0633\u062a \u0645\u06cc \u06a9\u0646\u062f \u0648 \u0628\u0631\u0631\u0633\u06cc \u0645\u06cc \u06a9\u0646\u062f \u06a9\u0647 \u0622\u06cc\u0627 \u06a9\u0644\u0645\u0647 \u062f\u0631 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0648\u062c\u0648\u062f \u062f\u0627\u0631\u062f \u06cc\u0627 \u062e\u06cc\u0631.  \u0627\u06af\u0631 \u06a9\u0644\u0645\u0647 \u062f\u0631 \u0645\u062c\u0645\u0648\u0639\u0647 \u062a\u0648\u0642\u0641 \u06a9\u0644\u0645\u0647 \u0648\u062c\u0648\u062f \u0646\u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u062f\u060c \u0628\u0631\u06af\u0631\u062f\u0627\u0646\u062f\u0647 \u0634\u062f\u0647 \u0648 \u0628\u0647 \u0622\u0646 \u0627\u0636\u0627\u0641\u0647 \u0645\u06cc \u0634\u0648\u062f <code>tokens_without_sw<\/code> \u0641\u0647\u0631\u0633\u062a  \u0631\u0627 <code>tokens_without_sw<\/code> \u0633\u067e\u0633 \u0644\u06cc\u0633\u062a \u0686\u0627\u067e \u0645\u06cc \u0634\u0648\u062f.<\/p>\n<p>\u062f\u0631 \u0627\u06cc\u0646\u062c\u0627 \u0686\u06af\u0648\u0646\u0647 \u062c\u0645\u0644\u0647 \u0628\u062f\u0648\u0646 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0628\u0647 \u0646\u0638\u0631 \u0645\u06cc \u0631\u0633\u062f:<\/p>\n<pre><code class=\"hljs\">(<span class=\"hljs-string\">'Nick'<\/span>, <span class=\"hljs-string\">'likes'<\/span>, <span class=\"hljs-string\">'play'<\/span>, <span class=\"hljs-string\">'football'<\/span>, <span class=\"hljs-string\">','<\/span>, <span class=\"hljs-string\">'however'<\/span>, <span class=\"hljs-string\">'fond'<\/span>, <span class=\"hljs-string\">'tennis'<\/span>, <span class=\"hljs-string\">'.'<\/span>)\n<\/code><\/pre>\n<p>\u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u06a9\u0644\u0645\u0627\u062a <code>to<\/code>\u060c <code>he<\/code>\u060c <code>is<\/code>\u060c <code>not<\/code>\u060c \u0648 <code>too<\/code> \u0627\u0632 \u062d\u06a9\u0645 \u062d\u0630\u0641 \u0634\u062f\u0647 \u0627\u0646\u062f.<\/p>\n<p>\u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0647 \u0644\u06cc\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u0628\u0627\u0644\u0627 \u0628\u067e\u06cc\u0648\u0646\u062f\u06cc\u062f \u062a\u0627 \u06cc\u06a9 \u062c\u0645\u0644\u0647 \u0628\u062f\u0648\u0646 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f\u060c \u0647\u0645\u0627\u0646\u0637\u0648\u0631 \u06a9\u0647 \u062f\u0631 \u0632\u06cc\u0631 \u0646\u0634\u0627\u0646 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">filtered_sentence = (<span class=\"hljs-string\">\" \"<\/span>).join(tokens_without_sw)\n<span class=\"hljs-built_in\">print<\/span>(filtered_sentence)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u06cc\u0646\u062c\u0627 \u062e\u0631\u0648\u062c\u06cc \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">Nick likes play football , however fond tennis .\n<\/code><\/pre>\n<h3 id=\"addingorremovingstopwordsinnltksdefaultstopwordlist\"><span class=\"ez-toc-section\" id=\"%d8%a7%d9%81%d8%b2%d9%88%d8%af%d9%86_%db%8c%d8%a7_%d8%ad%d8%b0%d9%81_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%aa%d9%88%d9%82%d9%81_%d8%af%d8%b1_%d9%81%d9%87%d8%b1%d8%b3%d8%aa_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%aa%d9%88%d9%82%d9%81_%d9%be%db%8c%d8%b4%e2%80%8c%d9%81%d8%b1%d8%b6_nltk\"><\/span>\u0627\u0641\u0632\u0648\u062f\u0646 \u06cc\u0627 \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 \u0641\u0647\u0631\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 NLTK<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u0637\u0628\u0642 \u0627\u0646\u062a\u062e\u0627\u0628 \u062e\u0648\u062f \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u0628\u0647 \u0645\u062c\u0645\u0648\u0639\u0647 \u0645\u0648\u062c\u0648\u062f \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 NLTK \u0627\u0636\u0627\u0641\u0647 \u06cc\u0627 \u062d\u0630\u0641 \u06a9\u0646\u06cc\u062f.  \u0642\u0628\u0644 \u0627\u0632 \u062d\u0630\u0641 \u06cc\u0627 \u0627\u0636\u0627\u0641\u0647 \u06a9\u0631\u062f\u0646 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 NLTK\u060c \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0644\u06cc\u0633\u062a \u062a\u0645\u0627\u0645 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0646\u06af\u0644\u06cc\u0633\u06cc \u067e\u0634\u062a\u06cc\u0628\u0627\u0646\u06cc \u0634\u062f\u0647 \u062a\u0648\u0633\u0637 NLTK \u0631\u0627 \u0628\u0628\u06cc\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-built_in\">print<\/span>(stopwords.words(<span class=\"hljs-string\">'english'<\/span>))\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">('i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', '\u0631\u0648\u06cc', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\")\n<\/code><\/pre>\n<h4 id=\"addingstopwordstodefaultnltkstopwordlist\">\u0627\u0641\u0632\u0648\u062f\u0646 Stop Words \u0628\u0647 \u0641\u0647\u0631\u0633\u062a \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 NLTK Stop Word<\/h4>\n<p>\u0628\u0631\u0627\u06cc \u0627\u0641\u0632\u0648\u062f\u0646 \u06cc\u06a9 \u06a9\u0644\u0645\u0647 \u0628\u0647 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 NLTK\u060c \u0627\u0628\u062a\u062f\u0627 \u06cc\u06a9 \u0634\u06cc \u0627\u0632 \u0642\u0633\u0645\u062a \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f <code>stopwords.words('english')<\/code> \u0641\u0647\u0631\u0633\u062a  \u062f\u0631 \u0645\u0631\u062d\u0644\u0647 \u0628\u0639\u062f\u060c \u0627\u0632 <code>append()<\/code> \u0631\u0648\u0634 \u0631\u0648\u06cc \u0644\u06cc\u0633\u062a\u06cc \u0628\u0631\u0627\u06cc \u0627\u0641\u0632\u0648\u062f\u0646 \u0647\u0631 \u06a9\u0644\u0645\u0647 \u0627\u06cc \u0628\u0647 \u0644\u06cc\u0633\u062a<\/p>\n<p>\u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u0627\u0636\u0627\u0641\u0647 \u0645\u06cc \u06a9\u0646\u062f <code>play<\/code> \u0628\u0647 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 NLTK.  \u0628\u0627\u0632 \u0647\u0645 \u062a\u0645\u0627\u0645 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u0627\u0632 \u062e\u0648\u062f \u062d\u0630\u0641 \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>text<\/code> \u0645\u062a\u063a\u06cc\u0631 \u0628\u0631\u0627\u06cc \u062f\u06cc\u062f\u0646 \u0627\u06cc\u0646\u06a9\u0647 \u0622\u06cc\u0627 \u06a9\u0644\u0645\u0647 <code>play<\/code> \u062d\u0630\u0641 \u0645\u06cc \u0634\u0648\u062f \u06cc\u0627 \u062e\u06cc\u0631<\/p>\n<pre><code class=\"hljs\">all_stopwords = stopwords.words(<span class=\"hljs-string\">'english'<\/span>)\nall_stopwords.append(<span class=\"hljs-string\">'play'<\/span>)\n\ntext_tokens = word_tokenize(text)\ntokens_without_sw = (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> text_tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-keyword\">not<\/span> word <span class=\"hljs-keyword\">in<\/span> all_stopwords)\n\n<span class=\"hljs-built_in\">print<\/span>(tokens_without_sw)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">(<span class=\"hljs-string\">'Nick'<\/span>, <span class=\"hljs-string\">'likes'<\/span>, <span class=\"hljs-string\">'football'<\/span>, <span class=\"hljs-string\">','<\/span>, <span class=\"hljs-string\">'however'<\/span>, <span class=\"hljs-string\">'fond'<\/span>, <span class=\"hljs-string\">'tennis'<\/span>, <span class=\"hljs-string\">'.'<\/span>)\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f \u06a9\u0647 \u06a9\u0644\u0645\u0647 <code>play<\/code> \u062d\u0630\u0641 \u0634\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<p>\u0647\u0645\u0686\u0646\u06cc\u0646 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0644\u06cc\u0633\u062a\u06cc \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u0628\u0647 \u0622\u0646 \u0627\u0636\u0627\u0641\u0647 \u06a9\u0646\u06cc\u062f <code>stopwords.words<\/code> \u0644\u06cc\u0633\u062a \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 <code>append<\/code> \u0631\u0648\u0634\u060c \u0647\u0645\u0627\u0646\u0637\u0648\u0631 \u06a9\u0647 \u062f\u0631 \u0632\u06cc\u0631 \u0646\u0634\u0627\u0646 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">sw_list = (<span class=\"hljs-string\">'likes'<\/span>,<span class=\"hljs-string\">'play'<\/span>)\nall_stopwords.extend(sw_list)\n\ntext_tokens = word_tokenize(text)\ntokens_without_sw = (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> text_tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-keyword\">not<\/span> word <span class=\"hljs-keyword\">in<\/span> all_stopwords)\n\n<span class=\"hljs-built_in\">print<\/span>(tokens_without_sw)\n<\/code><\/pre>\n<p>\u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627 \u062f\u0648 \u06a9\u0644\u0645\u0647 \u0627\u0636\u0627\u0641\u0647 \u0645\u06cc \u06a9\u0646\u062f <code>likes<\/code> \u0648 <code>play<\/code> \u0628\u0647 <code>stopwords.word<\/code> \u0641\u0647\u0631\u0633\u062a  \u062f\u0631 \u062e\u0631\u0648\u062c\u06cc\u060c \u0627\u06cc\u0646 \u062f\u0648 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u0645\u0627\u0646\u0646\u062f \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0645\u0634\u0627\u0647\u062f\u0647 \u0646\u062e\u0648\u0627\u0647\u06cc\u062f \u06a9\u0631\u062f:<\/p>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">(<span class=\"hljs-string\">'Nick'<\/span>, <span class=\"hljs-string\">'football'<\/span>, <span class=\"hljs-string\">','<\/span>, <span class=\"hljs-string\">'however'<\/span>, <span class=\"hljs-string\">'fond'<\/span>, <span class=\"hljs-string\">'tennis'<\/span>, <span class=\"hljs-string\">'.'<\/span>)\n<\/code><\/pre>\n<h4 id=\"removingstopwordsfromdefaultnltkstopwordlist\">\u062d\u0630\u0641 Stop Words \u0627\u0632 \u0641\u0647\u0631\u0633\u062a \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 NLTK Stop Word<\/h4>\n<p>\u0627\u0632 \u0622\u0646\u062c\u0627 \u06a9\u0647 <code>stopwords.word('english')<\/code> \u0641\u0642\u0637 \u06cc\u06a9 \u0644\u06cc\u0633\u062a \u0627\u0632 \u0645\u0648\u0627\u0631\u062f \u0627\u0633\u062a\u060c \u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0645\u0648\u0627\u0631\u062f \u0631\u0627 \u0645\u0627\u0646\u0646\u062f \u0647\u0631 \u0644\u06cc\u0633\u062a \u062f\u06cc\u06af\u0631\u06cc \u0627\u0632 \u0627\u06cc\u0646 \u0644\u06cc\u0633\u062a \u062d\u0630\u0641 \u06a9\u0646\u06cc\u062f.  \u0633\u0627\u062f\u0647 \u062a\u0631\u06cc\u0646 \u0631\u0627\u0647 \u0628\u0631\u0627\u06cc \u0627\u0646\u062c\u0627\u0645 \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0627\u0632 \u0637\u0631\u06cc\u0642 <code>remove()<\/code> \u0631\u0648\u0634.  \u0627\u06cc\u0646 \u0628\u0631\u0627\u06cc \u0632\u0645\u0627\u0646\u06cc \u0645\u0641\u06cc\u062f \u0627\u0633\u062a \u06a9\u0647 \u0628\u0631\u0646\u0627\u0645\u0647 \u0634\u0645\u0627 \u0646\u06cc\u0627\u0632 \u0628\u0647 \u06a9\u0644\u0645\u0647 \u062a\u0648\u0642\u0641 \u062f\u0627\u0631\u062f \u062a\u0627 \u062d\u0630\u0641 \u0646\u0634\u0648\u062f.  \u0628\u0631\u0627\u06cc \u0645\u062b\u0627\u0644\u060c \u0645\u0645\u06a9\u0646 \u0627\u0633\u062a \u0644\u0627\u0632\u0645 \u0628\u0627\u0634\u062f \u0627\u06cc\u0646 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u062d\u0641\u0638 \u06a9\u0646\u06cc\u062f <code>not<\/code> \u062f\u0631 \u06cc\u06a9 \u062c\u0645\u0644\u0647 \u0628\u0631\u0627\u06cc \u062f\u0627\u0646\u0633\u062a\u0646 \u0627\u06cc\u0646\u06a9\u0647 \u0686\u0647 \u0632\u0645\u0627\u0646\u06cc \u06cc\u06a9 \u06af\u0632\u0627\u0631\u0647 \u0646\u0641\u06cc \u0645\u06cc \u0634\u0648\u062f.<\/p>\n<p>\u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u06a9\u0644\u0645\u0647 \u062a\u0648\u0642\u0641 \u0631\u0627 \u062d\u0630\u0641 \u0645\u06cc \u06a9\u0646\u062f <code>not<\/code> \u0627\u0632 \u0644\u06cc\u0633\u062a \u067e\u06cc\u0634 \u0641\u0631\u0636 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 NLTK:<\/p>\n<pre><code class=\"hljs\">all_stopwords = stopwords.words(<span class=\"hljs-string\">'english'<\/span>)\nall_stopwords.remove(<span class=\"hljs-string\">'not'<\/span>)\n\ntext_tokens = word_tokenize(text)\ntokens_without_sw = (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> text_tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-keyword\">not<\/span> word <span class=\"hljs-keyword\">in<\/span> all_stopwords)\n\n<span class=\"hljs-built_in\">print<\/span>(tokens_without_sw)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">(<span class=\"hljs-string\">'Nick'<\/span>, <span class=\"hljs-string\">'likes'<\/span>, <span class=\"hljs-string\">'play'<\/span>, <span class=\"hljs-string\">'football'<\/span>, <span class=\"hljs-string\">','<\/span>, <span class=\"hljs-string\">'however'<\/span>, <span class=\"hljs-string\">'not'<\/span>, <span class=\"hljs-string\">'fond'<\/span>, <span class=\"hljs-string\">'tennis'<\/span>, <span class=\"hljs-string\">'.'<\/span>)\n<\/code><\/pre>\n<p>\u0627\u0632 \u062e\u0631\u0648\u062c\u06cc\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u06cc\u0646 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u0628\u0628\u06cc\u0646\u06cc\u062f <code>not<\/code> \u0627\u0632 \u062c\u0645\u0644\u0647 \u0648\u0631\u0648\u062f\u06cc \u062d\u0630\u0641 \u0646\u0634\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<h3 id=\"usingpythonsgensimlibrary\"><span class=\"ez-toc-section\" id=\"%d8%a8%d8%a7_%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87_gensim_%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\"><\/span>\u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u067e\u0627\u06cc\u062a\u0648\u0646<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u06cc\u06a9 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0628\u0633\u06cc\u0627\u0631 \u0645\u0641\u06cc\u062f \u062f\u06cc\u06af\u0631 \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u06cc\u06a9 \u0631\u0634\u062a\u0647 \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0627\u0633\u062a.  \u062a\u0646\u0647\u0627 \u06a9\u0627\u0631\u06cc \u06a9\u0647 \u0628\u0627\u06cc\u062f \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u062f \u0627\u06cc\u0646 \u0627\u0633\u062a \u06a9\u0647 import \u0631\u0627 <code>remove_stopwords()<\/code> \u0631\u0648\u0634 \u0627\u0632 <code>gensim.parsing.preprocessing<\/code> \u0645\u062f\u0648\u0644.  \u062f\u0631 \u0645\u0631\u062d\u0644\u0647 \u0628\u0639\u062f\u060c \u0628\u0627\u06cc\u062f \u062c\u0645\u0644\u0647 \u062e\u0648\u062f \u0631\u0627 \u06a9\u0647 \u0645\u06cc \u062e\u0648\u0627\u0647\u06cc\u062f \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u0627\u0632 \u0622\u0646 \u062d\u0630\u0641 \u06a9\u0646\u06cc\u062f\u060c \u0628\u0647 \u0622\u0646 \u0645\u0646\u062a\u0642\u0644 \u06a9\u0646\u06cc\u062f <code>remove_stopwords()<\/code> \u0631\u0648\u0634\u06cc \u06a9\u0647 \u0631\u0634\u062a\u0647 \u0645\u062a\u0646\u06cc \u0631\u0627 \u0628\u062f\u0648\u0646 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0628\u0627\u0632 \u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f.<\/p>\n<p>\u0628\u06cc\u0627\u06cc\u06cc\u062f \u0646\u06af\u0627\u0647\u06cc \u0628\u0647 \u06cc\u06a9 \u0645\u062b\u0627\u0644 \u0633\u0627\u062f\u0647 \u0627\u0632 \u0631\u0648\u0634 \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u0637\u0631\u06cc\u0642 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0628\u06cc\u0627\u0646\u062f\u0627\u0632\u06cc\u0645.<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> gensim.parsing.preprocessing <span class=\"hljs-keyword\">import<\/span> remove_stopwords\n\ntext = <span class=\"hljs-string\">\"Nick likes to play football, however he is not too fond of tennis.\"<\/span>\nfiltered_sentence = remove_stopwords(text)\n\n<span class=\"hljs-built_in\">print<\/span>(filtered_sentence)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">Nick likes play football, fond tennis.\n<\/code><\/pre>\n<p>\u0630\u06a9\u0631 \u0627\u06cc\u0646 \u0646\u06a9\u062a\u0647 \u0636\u0631\u0648\u0631\u06cc \u0627\u0633\u062a \u06a9\u0647 \u062e\u0631\u0648\u062c\u06cc \u067e\u0633 \u0627\u0632 \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc NLTK \u0648 Gensim \u0645\u062a\u0641\u0627\u0648\u062a \u0627\u0633\u062a.  \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0645\u062b\u0627\u0644\u060c \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0627\u06cc\u0646 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u062f\u0631 \u0646\u0638\u0631 \u06af\u0631\u0641\u062a <code>however<\/code> \u06cc\u06a9 \u06a9\u0644\u0645\u0647 \u062a\u0648\u0642\u0641 \u0628\u0627\u0634\u062f \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 NLTK \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0631\u0627 \u0646\u06a9\u0631\u062f\u0647 \u0627\u0633\u062a \u0648 \u0627\u0632 \u0627\u06cc\u0646 \u0631\u0648 \u0622\u0646 \u0631\u0627 \u062d\u0630\u0641 \u0646\u06a9\u0631\u062f\u0647 \u0627\u0633\u062a.  \u0627\u06cc\u0646 \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f \u06a9\u0647 \u0647\u06cc\u0686 \u0642\u0627\u0639\u062f\u0647 \u0633\u062e\u062a \u0648 \u0633\u0631\u06cc\u0639\u06cc \u0648\u062c\u0648\u062f \u0646\u062f\u0627\u0631\u062f \u06a9\u0647 \u06a9\u0644\u0645\u0647 \u062a\u0648\u0642\u0641 \u0686\u06cc\u0633\u062a \u0648 \u0686\u0647 \u0646\u06cc\u0633\u062a.  \u0647\u0645\u0647 \u0686\u06cc\u0632 \u0628\u0647 \u0648\u0638\u06cc\u0641\u0647 \u0627\u06cc \u06a9\u0647 \u0642\u0631\u0627\u0631 \u0627\u0633\u062a \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u062f \u0628\u0633\u062a\u06af\u06cc \u062f\u0627\u0631\u062f.<\/p>\n<p>\u062f\u0631 \u0628\u062e\u0634 \u0628\u0639\u062f\u06cc\u060c \u0631\u0648\u0634 \u0627\u0641\u0632\u0648\u062f\u0646 \u06cc\u0627 \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u0628\u0647 \u0645\u062c\u0645\u0648\u0639\u0647 \u0645\u0648\u062c\u0648\u062f \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 Gensim \u062e\u0648\u0627\u0647\u06cc\u062f \u062f\u06cc\u062f.<\/p>\n<h3 id=\"addingandremovingstopwordsindefaultgensimstopwordslist\"><span class=\"ez-toc-section\" id=\"%d8%a7%d9%81%d8%b2%d9%88%d8%af%d9%86_%d9%88_%d8%ad%d8%b0%d9%81_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%aa%d9%88%d9%82%d9%81_%d8%af%d8%b1_%d9%81%d9%87%d8%b1%d8%b3%d8%aa_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%aa%d9%88%d9%82%d9%81_%d9%be%db%8c%d8%b4%e2%80%8c%d9%81%d8%b1%d8%b6_gensim\"><\/span>\u0627\u0641\u0632\u0648\u062f\u0646 \u0648 \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 \u0641\u0647\u0631\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 Gensim<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0628\u06cc\u0627\u06cc\u06cc\u062f \u0627\u0628\u062a\u062f\u0627 \u0646\u06af\u0627\u0647\u06cc \u0628\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u06cc\u0646\u062f\u0627\u0632\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> gensim\nall_stopwords = gensim.parsing.preprocessing.STOPWORDS\n<span class=\"hljs-built_in\">print<\/span>(all_stopwords)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">frozenset({'her', 'during', 'among', 'thereafter', 'only', 'hers', 'in', 'none', 'with', 'un', 'put', 'hence', 'each', 'would', 'have', 'to', 'itself', 'that', 'seeming', 'hereupon', 'someone', 'eight', 'she', 'forty', 'much', 'throughout', 'less', 'was', 'interest', 'elsewhere', 'already', 'whatever', 'or', 'seem', 'fire', 'however', 'keep', 'detail', 'both', 'yourselves', 'indeed', 'enough', 'too', 'us', 'wherein', 'himself', 'behind', 'everything', 'part', 'made', 'thereupon', 'for', 'nor', 'before', 'front', 'sincere', 'really', 'than', 'alone', 'doing', 'amongst', 'across', 'him', 'another', 'some', 'whoever', 'four', 'other', 'latterly', 'off', 'sometime', 'above', 'often', 'herein', 'am', 'whereby', 'although', 'who', 'should', 'amount', 'anyway', 'else', 'upon', 'this', 'when', 'we', 'few', 'anywhere', 'will', 'though', 'being', 'fill', 'used', 'full', 'thru', 'call', 'whereafter', 'various', 'has', 'same', 'former', 'whereas', 'what', 'had', 'mostly', 'onto', 'go', 'could', 'yourself', 'meanwhile', 'beyond', 'beside', 'ours', 'side', 'our', 'five', 'nobody', 'herself', 'is', 'ever', 'they', 'here', 'eleven', 'fifty', 'therefore', 'nothing', 'not', 'mill', 'without', 'whence', 'get', 'whither', 'then', 'no', 'own', 'many', 'anything', 'etc', 'make', 'from', 'against', 'ltd', 'next', 'afterwards', 'unless', 'while', 'thin', 'beforehand', 'by', 'amoungst', 'you', 'third', 'as', 'those', 'done', 'becoming', 'say', 'either', 'doesn', 'twenty', 'his', 'yet', 'latter', 'somehow', 'are', 'these', 'mine', 'under', 'take', 'whose', 'others', 'over', 'perhaps', 'thence', 'does', 'where', 'two', 'always', 'your', 'wherever', 'became', 'which', 'about', 'but', 'towards', 'still', 'rather', 'quite', 'whether', 'somewhere', 'might', 'do', 'bottom', 'until', 'km', 'yours', 'serious', 'find', 'please', 'hasnt', 'otherwise', 'six', 'toward', 'sometimes', 'of', 'fifteen', 'eg', 'just', 'a', 'me', 'describe', 'why', 'an', 'and', 'may', 'within', 'kg', 'con', 're', 'nevertheless', 'through', 'very', 'anyhow', 'down', 'nowhere', 'now', 'it', 'cant', 'de', 'move', 'hereby', 'how', 'found', 'whom', 'were', 'together', 'again', 'moreover', 'first', 'never', 'below', 'between', 'computer', 'ten', 'into', 'see', 'everywhere', 'there', 'neither', 'every', 'couldnt', 'up', 'several', 'the', 'i', 'becomes', 'don', 'ie', 'been', 'whereupon', 'seemed', 'most', 'noone', 'whole', 'must', 'cannot', 'per', 'my', 'thereby', 'so', 'he', 'name', 'co', 'its', 'everyone', 'if', 'become', 'thick', 'thus', 'regarding', 'didn', 'give', 'all', 'show', 'any', 'using', '\u0631\u0648\u06cc', 'further', 'around', 'back', 'least', 'since', 'anyone', 'once', 'can', 'bill', 'hereafter', 'be', 'seems', 'their', 'myself', 'nine', 'also', 'system', 'at', 'more', 'out', 'twelve', 'therein', 'almost', 'except', 'last', 'did', 'something', 'besides', 'via', 'whenever', 'formerly', 'cry', 'one', 'hundred', 'sixty', 'after', 'well', 'them', 'namely', 'empty', 'three', 'even', 'along', 'because', 'ourselves', 'such', 'top', 'due', 'inc', 'themselves'})\n<\/code><\/pre>\n<p>\u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u0645\u062c\u0645\u0648\u0639\u0647 \u067e\u06cc\u0634 \u0641\u0631\u0636 Gensim \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 \u0645\u0642\u0627\u06cc\u0633\u0647 \u0628\u0627 NLTK \u0628\u0633\u06cc\u0627\u0631 \u062f\u0642\u06cc\u0642 \u062a\u0631 \u0627\u0633\u062a.  \u0647\u0645\u0686\u0646\u06cc\u0646\u060c Gensim \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 \u0631\u0627 \u062f\u0631 \u06cc\u06a9 \u0634\u06cc\u0621 \u0645\u062c\u0645\u0648\u0639\u0647 \u062b\u0627\u0628\u062a \u0630\u062e\u06cc\u0631\u0647 \u0645\u06cc\u200c\u06a9\u0646\u062f.<\/p>\n<h4 id=\"addingstopwordstodefaultgensimstopwordslist\">\u0627\u0641\u0632\u0648\u062f\u0646 Stop Words \u0628\u0647 \u0641\u0647\u0631\u0633\u062a \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 Gensim Stop Words<\/h4>\n<p>\u0628\u0631\u0627\u06cc \u062f\u0633\u062a\u0631\u0633\u06cc \u0628\u0647 \u0644\u06cc\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 Gensim\u060c \u0628\u0627\u06cc\u062f import \u0645\u062c\u0645\u0648\u0639\u0647 \u06cc\u062e \u0632\u062f\u0647 <code>STOPWORDS<\/code> \u0627\u0632 <code>gensim.parsing.preprocessong<\/code> \u0628\u0633\u062a\u0647 \u0628\u0646\u062f\u06cc  \u0645\u062c\u0645\u0648\u0639\u0647 \u0645\u0646\u062c\u0645\u062f \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0646\u0648\u0639\u06cc \u0645\u062c\u0645\u0648\u0639\u0647 \u063a\u06cc\u0631\u0642\u0627\u0628\u0644 \u062a\u063a\u06cc\u06cc\u0631 \u0627\u0633\u062a.  \u0634\u0645\u0627 \u0646\u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0639\u0646\u0627\u0635\u0631 \u0631\u0627 \u062f\u0631 \u06cc\u06a9 \u0645\u062c\u0645\u0648\u0639\u0647 \u062b\u0627\u0628\u062a \u0627\u0636\u0627\u0641\u0647 \u06cc\u0627 \u062d\u0630\u0641 \u06a9\u0646\u06cc\u062f.  \u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646\u060c \u0628\u0631\u0627\u06cc \u0627\u0636\u0627\u0641\u0647 \u06a9\u0631\u062f\u0646 \u06cc\u06a9 \u0639\u0646\u0635\u0631\u060c \u0628\u0627\u06cc\u062f \u0627\u0632 \u0622\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f <code>union<\/code> \u062a\u0627\u0628\u0639 \u0631\u0648\u06cc \u0645\u062c\u0645\u0648\u0639\u0647 \u0645\u0646\u062c\u0645\u062f \u0648 \u0622\u0646 \u0631\u0627 \u0628\u0647 \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u06cc \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062c\u062f\u06cc\u062f \u0645\u0646\u062a\u0642\u0644 \u06a9\u0646\u06cc\u062f.  \u0631\u0627 <code>union<\/code> \u0645\u062a\u062f \u06cc\u06a9 \u0645\u062c\u0645\u0648\u0639\u0647 \u062c\u062f\u06cc\u062f \u0631\u0627 \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f \u06a9\u0647 \u062d\u0627\u0648\u06cc \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062c\u062f\u06cc\u062f \u0627\u0636\u0627\u0641\u0647 \u0634\u062f\u0647 \u0634\u0645\u0627 \u0627\u0633\u062a\u060c \u0647\u0645\u0627\u0646\u0637\u0648\u0631 \u06a9\u0647 \u062f\u0631 \u0632\u06cc\u0631 \u0646\u0634\u0627\u0646 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<p>\u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0627\u0636\u0627\u0641\u0647 \u0645\u06cc \u06a9\u0646\u062f <code>likes<\/code> \u0648 <code>play<\/code> \u0628\u0647 \u0644\u06cc\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 Gensim:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> gensim.parsing.preprocessing <span class=\"hljs-keyword\">import<\/span> STOPWORDS\n\nall_stopwords_gensim = STOPWORDS.union(<span class=\"hljs-built_in\">set<\/span>((<span class=\"hljs-string\">'likes'<\/span>, <span class=\"hljs-string\">'play'<\/span>)))\n\ntext = <span class=\"hljs-string\">\"Nick likes to play football, however he is not too fond of tennis.\"<\/span>\ntext_tokens = word_tokenize(text)\ntokens_without_sw = (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> text_tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-keyword\">not<\/span> word <span class=\"hljs-keyword\">in<\/span> all_stopwords_gensim)\n\n<span class=\"hljs-built_in\">print<\/span>(tokens_without_sw)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">(<span class=\"hljs-string\">'Nick'<\/span>, <span class=\"hljs-string\">'football'<\/span>, <span class=\"hljs-string\">','<\/span>, <span class=\"hljs-string\">'fond'<\/span>, <span class=\"hljs-string\">'tennis'<\/span>, <span class=\"hljs-string\">'.'<\/span>)\n<\/code><\/pre>\n<p>\u0627\u0632 \u062e\u0631\u0648\u062c\u06cc \u0628\u0627\u0644\u0627 \u0645\u06cc \u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u06a9\u0644\u0645\u0627\u062a <code>like<\/code> \u0648 <code>play<\/code> \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 \u0646\u0638\u0631 \u06af\u0631\u0641\u062a\u0647 \u0634\u062f\u0647 \u0627\u0646\u062f \u0648 \u062f\u0631 \u0646\u062a\u06cc\u062c\u0647 \u0627\u0632 \u062c\u0645\u0644\u0647 \u0648\u0631\u0648\u062f\u06cc \u062d\u0630\u0641 \u0634\u062f\u0647 \u0627\u0646\u062f.<\/p>\n<h4 id=\"removingstopwordsfromdefaultgensimstopwordlist\">\u062d\u0630\u0641 Stop Words \u0627\u0632 \u0641\u0647\u0631\u0633\u062a \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 Gensim Stopword<\/h4>\n<p>\u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u0644\u06cc\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 Gensim\u060c \u0628\u0627\u06cc\u062f \u0628\u0627 \u0622\u0646 \u062a\u0645\u0627\u0633 \u0628\u06af\u06cc\u0631\u06cc\u062f <code>difference()<\/code> \u0631\u0648\u0634 \u0631\u0648\u06cc \u0634\u06cc\u0621 \u0645\u0646\u062c\u0645\u062f \u0645\u062c\u0645\u0648\u0639\u0647\u060c \u06a9\u0647 \u062d\u0627\u0648\u06cc \u0644\u06cc\u0633\u062a\u06cc \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0633\u062a.  \u0628\u0627\u06cc\u062f \u0645\u062c\u0645\u0648\u0639\u0647\u200c\u0627\u06cc \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u06a9\u0647 \u0645\u06cc\u200c\u062e\u0648\u0627\u0647\u06cc\u062f \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u062b\u0627\u0628\u062a \u062d\u0630\u0641 \u06a9\u0646\u06cc\u062f \u0628\u0647 \u0622\u0646 \u0627\u0631\u0633\u0627\u0644 \u06a9\u0646\u06cc\u062f <code>difference()<\/code> \u0631\u0648\u0634.  \u0631\u0627 <code>difference()<\/code> \u0645\u062a\u062f \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u06cc \u0631\u0627 \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f \u06a9\u0647 \u062d\u0627\u0648\u06cc \u062a\u0645\u0627\u0645 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0633\u062a <em>\u0628\u062c\u0632<\/em> \u0622\u0646\u0647\u0627\u06cc\u06cc \u06a9\u0647 \u0628\u0647 <code>difference()<\/code> \u0631\u0648\u0634.<\/p>\n<p>\u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u062d\u0630\u0641 \u0645\u06cc \u06a9\u0646\u062f <code>not<\/code> \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 Gensim:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> gensim.parsing.preprocessing <span class=\"hljs-keyword\">import<\/span> STOPWORDS\n\nall_stopwords_gensim = STOPWORDS\nsw_list = {<span class=\"hljs-string\">\"not\"<\/span>}\nall_stopwords_gensim = STOPWORDS.difference(sw_list)\n\ntext = <span class=\"hljs-string\">\"Nick likes to play football, however he is not too fond of tennis.\"<\/span>\ntext_tokens = word_tokenize(text)\ntokens_without_sw = (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> text_tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-keyword\">not<\/span> word <span class=\"hljs-keyword\">in<\/span> all_stopwords_gensim)\n\n<span class=\"hljs-built_in\">print<\/span>(tokens_without_sw)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">(<span class=\"hljs-string\">'Nick'<\/span>, <span class=\"hljs-string\">'likes'<\/span>, <span class=\"hljs-string\">'play'<\/span>, <span class=\"hljs-string\">'football'<\/span>, <span class=\"hljs-string\">','<\/span>, <span class=\"hljs-string\">'not'<\/span>, <span class=\"hljs-string\">'fond'<\/span>, <span class=\"hljs-string\">'tennis'<\/span>, <span class=\"hljs-string\">'.'<\/span>)\n<\/code><\/pre>\n<p>\u0627\u0632 \u0622\u0646\u062c\u0627\u06cc\u06cc \u06a9\u0647 \u06a9\u0644\u0645\u0647 <code>not<\/code> \u0627\u06a9\u0646\u0648\u0646 \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062d\u0630\u0641 \u0634\u062f\u0647 \u0627\u0633\u062a\u060c \u0645\u06cc \u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u067e\u0633 \u0627\u0632 \u062d\u0630\u0641 \u06a9\u0644\u0645\u0647 \u062a\u0648\u0642\u0641 \u0627\u0632 \u062c\u0645\u0644\u0647 \u0648\u0631\u0648\u062f\u06cc \u062d\u0630\u0641 \u0646\u0634\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<h3 id=\"usingthespacylibrary\"><span class=\"ez-toc-section\" id=\"%d8%a8%d8%a7_%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87_spacy\"><\/span>\u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 SpaCy<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 SpaCy \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u06cc\u06a9 \u0632\u0628\u0627\u0646 \u0628\u0633\u06cc\u0627\u0631 \u0645\u0641\u06cc\u062f \u062f\u06cc\u06af\u0631 \u0628\u0631\u0627\u06cc \u067e\u0631\u062f\u0627\u0632\u0634 \u0632\u0628\u0627\u0646 \u0637\u0628\u06cc\u0639\u06cc \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0627\u0633\u062a.<\/p>\n<p>\u0628\u0631\u0627\u06cc \u0646\u0635\u0628 SpaCy \u0628\u0627\u06cc\u062f \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0631\u0627 \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f \u0631\u0648\u06cc \u0641\u0631\u0645\u0627\u0646 \u0634\u0645\u0627 terminal:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-meta\">$<\/span><span class=\"bash\"> pip install -U spacy<\/span>\n<\/code><\/pre>\n<p>\u067e\u0633 \u0627\u0632 \u062f\u0627\u0646\u0644\u0648\u062f \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647\u060c \u0628\u0627\u06cc\u062f \u0645\u062f\u0644 \u0632\u0628\u0627\u0646 \u0631\u0627 \u0646\u06cc\u0632 \u062f\u0627\u0646\u0644\u0648\u062f \u06a9\u0646\u06cc\u062f.  \u0686\u0646\u062f\u06cc\u0646 \u0645\u062f\u0644 \u062f\u0631 SpaCy \u0628\u0631\u0627\u06cc \u0632\u0628\u0627\u0646 \u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641 \u0648\u062c\u0648\u062f \u062f\u0627\u0631\u062f.  \u0645\u0627 \u0645\u062f\u0644 \u0632\u0628\u0627\u0646 \u0627\u0646\u06af\u0644\u06cc\u0633\u06cc \u0631\u0627 \u0646\u0635\u0628 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f.  \u062f\u0633\u062a\u0648\u0631 \u0632\u06cc\u0631 \u0631\u0627 \u062f\u0631 \u062e\u0648\u062f \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f terminal:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-meta\">$<\/span><span class=\"bash\"> python -m spacy download en<\/span>\n<\/code><\/pre>\n<p>\u0647\u0646\u06af\u0627\u0645\u06cc \u06a9\u0647 \u0645\u062f\u0644 \u0632\u0628\u0627\u0646 \u062f\u0627\u0646\u0644\u0648\u062f \u0634\u062f\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 SpaCy \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u0627\u0632 \u0645\u062a\u0646 \u062d\u0630\u0641 \u06a9\u0646\u06cc\u062f.  \u0628\u0647 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> spacy\nsp = spacy.load(<span class=\"hljs-string\">'en_core_web_sm'<\/span>)\n\nall_stopwords = sp.Defaults.stop_words\n\ntext = <span class=\"hljs-string\">\"Nick likes to play football, however he is not too fond of tennis.\"<\/span>\ntext_tokens = word_tokenize(text)\ntokens_without_sw= (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> text_tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-keyword\">not<\/span> word <span class=\"hljs-keyword\">in<\/span> all_stopwords)\n\n<span class=\"hljs-built_in\">print<\/span>(tokens_without_sw)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627 \u0627\u0628\u062a\u062f\u0627 \u0645\u062f\u0644 \u0632\u0628\u0627\u0646 \u0631\u0627 \u0628\u0627\u0631\u06af\u0630\u0627\u0631\u06cc \u06a9\u0631\u062f\u0647 \u0648 \u062f\u0631 \u0622\u0646 \u0630\u062e\u06cc\u0631\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>sp<\/code> \u0645\u062a\u063a\u06cc\u0631.  \u0631\u0627 <code>sp.Default.stop_words<\/code> \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u06cc \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u067e\u06cc\u0634 \u0641\u0631\u0636 \u0628\u0631\u0627\u06cc \u0645\u062f\u0644 \u0632\u0628\u0627\u0646 \u0627\u0646\u06af\u0644\u06cc\u0633\u06cc \u062f\u0631 SpaCy \u0627\u0633\u062a.  \u062f\u0631 \u0645\u0631\u062d\u0644\u0647 \u0628\u0639\u062f\u060c \u0645\u0627 \u0628\u0647 \u0633\u0627\u062f\u06af\u06cc \u0647\u0631 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u062f\u0631 \u0645\u062a\u0646 \u0648\u0631\u0648\u062f\u06cc \u062a\u06a9\u0631\u0627\u0631 \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u0648 \u0627\u06af\u0631 \u06a9\u0644\u0645\u0647 \u062f\u0631 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0645\u062f\u0644 \u0632\u0628\u0627\u0646 SpaCy \u0648\u062c\u0648\u062f \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u062f\u060c \u06a9\u0644\u0645\u0647 \u062d\u0630\u0641 \u0645\u06cc \u0634\u0648\u062f.<\/p>\n<p>\u062f\u0631 \u0627\u06cc\u0646\u062c\u0627 \u062e\u0631\u0648\u062c\u06cc \u0627\u0633\u062a:<\/p>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">(<span class=\"hljs-string\">'Nick'<\/span>, <span class=\"hljs-string\">'likes'<\/span>, <span class=\"hljs-string\">'play'<\/span>, <span class=\"hljs-string\">'football'<\/span>, <span class=\"hljs-string\">','<\/span>, <span class=\"hljs-string\">'fond'<\/span>, <span class=\"hljs-string\">'tennis'<\/span>, <span class=\"hljs-string\">'.'<\/span>)\n<\/code><\/pre>\n<h3 id=\"addingandremovingstopwordsinspacydefaultstopwordlist\"><span class=\"ez-toc-section\" id=\"%d8%a7%d9%81%d8%b2%d9%88%d8%af%d9%86_%d9%88_%d8%ad%d8%b0%d9%81_stop_words_%d8%af%d8%b1_spacy_default_stop_word_list\"><\/span>\u0627\u0641\u0632\u0648\u062f\u0646 \u0648 \u062d\u0630\u0641 Stop Words \u062f\u0631 SpaCy Default Stop Word List<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0645\u0627\u0646\u0646\u062f \u0633\u0627\u06cc\u0631 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647\u200c\u0647\u0627\u06cc NLP\u060c \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u0646\u06cc\u0632 \u0627\u0632 \u0641\u0647\u0631\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 \u062f\u0631 Spacy \u0627\u0636\u0627\u0641\u0647 \u06cc\u0627 \u062d\u0630\u0641 \u06a9\u0646\u06cc\u062f.  \u0627\u0645\u0627 \u0642\u0628\u0644 \u0627\u0632 \u0622\u0646\u060c \u0644\u06cc\u0633\u062a\u06cc \u0627\u0632 \u062a\u0645\u0627\u0645 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0645\u0648\u062c\u0648\u062f \u062f\u0631 SpaCy \u0631\u0627 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u06cc\u062f.<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-built_in\">print<\/span>(<span class=\"hljs-built_in\">len<\/span>(all_stopwords))\n<span class=\"hljs-built_in\">print<\/span>(all_stopwords)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">326\n{'whence', 'here', 'show', 'were', 'why', 'n\u2019t', 'the', 'whereupon', 'not', 'more', 'how', 'eight', 'indeed', 'i', 'only', 'via', 'nine', 're', 'themselves', 'almost', 'to', 'already', 'front', 'least', 'becomes', 'thereby', 'doing', 'her', 'together', 'be', 'often', 'then', 'quite', 'less', 'many', 'they', 'ourselves', 'take', 'its', 'yours', 'each', 'would', 'may', 'namely', 'do', 'whose', 'whether', 'side', 'both', 'what', 'between', 'toward', 'our', 'whereby', \"'m\", 'formerly', 'myself', 'had', 'really', 'call', 'keep', \"'re\", 'hereupon', 'can', 'their', 'eleven', '\u2019m', 'even', 'around', 'twenty', 'mostly', 'did', 'at', 'an', 'seems', 'serious', 'against', \"n't\", 'except', 'has', 'five', 'he', 'last', '\u2018ve', 'because', 'we', 'himself', 'yet', 'something', 'somehow', '\u2018m', 'towards', 'his', 'six', 'anywhere', 'us', '\u2018d', 'thru', 'thus', 'which', 'everything', 'become', 'herein', 'one', 'in', 'although', 'sometime', 'give', 'cannot', 'besides', 'across', 'noone', 'ever', 'that', 'over', 'among', 'during', 'however', 'when', 'sometimes', 'still', 'seemed', 'get', \"'ve\", 'him', 'with', 'part', 'beyond', 'everyone', 'same', 'this', 'latterly', 'no', 'regarding', 'elsewhere', 'others', 'moreover', 'else', 'back', 'alone', 'somewhere', 'are', 'will', 'beforehand', 'ten', 'very', 'most', 'three', 'former', '\u2019re', 'otherwise', 'several', 'also', 'whatever', 'am', 'becoming', 'beside', '\u2019s', 'nothing', 'some', 'since', 'thence', 'anyway', 'out', 'up', 'well', 'it', 'various', 'four', 'top', '\u2018s', 'than', 'under', 'might', 'could', 'by', 'too', 'and', 'whom', '\u2018ll', 'say', 'therefore', \"'s\", 'other', 'throughout', 'became', 'your', 'put', 'per', \"'ll\", 'fifteen', 'must', 'before', 'whenever', 'anyone', 'without', 'does', 'was', 'where', 'thereafter', \"'d\", 'another', 'yourselves', 'n\u2018t', 'see', 'go', 'wherever', 'just', 'seeming', 'hence', 'full', 'whereafter', 'bottom', 'whole', 'own', 'empty', 'due', 'behind', 'while', 'onto', 'wherein', 'off', 'again', 'a', 'two', 'above', 'therein', 'sixty', 'those', 'whereas', 'using', 'latter', 'used', 'my', 'herself', 'hers', 'or', 'neither', 'forty', 'thereupon', 'now', 'after', 'yourself', 'whither', 'rather', 'once', 'from', 'until', 'anything', 'few', 'into', 'such', 'being', 'make', 'mine', 'please', 'along', 'hundred', 'should', 'below', 'third', 'unless', 'upon', 'perhaps', 'ours', 'but', 'never', 'whoever', 'fifty', 'any', 'all', 'nobody', 'there', 'have', 'anyhow', 'of', 'seem', 'down', 'is', 'every', '\u2019ll', 'much', 'none', 'further', 'me', 'who', 'nevertheless', 'about', 'everywhere', 'name', 'enough', '\u2019d', 'next', 'meanwhile', 'though', 'through', '\u0631\u0648\u06cc', 'first', 'been', 'hereby', 'if', 'move', 'so', 'either', 'amongst', 'for', 'twelve', 'nor', 'she', 'always', 'these', 'as', '\u2019ve', 'amount', '\u2018re', 'someone', 'afterwards', 'you', 'nowhere', 'itself', 'done', 'hereafter', 'within', 'made', 'ca', 'them'}\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f \u06a9\u0647 326 \u06a9\u0644\u0645\u0647 \u062a\u0648\u0642\u0641 \u062f\u0631 \u0644\u06cc\u0633\u062a \u067e\u06cc\u0634 \u0641\u0631\u0636 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 SpaCy \u0648\u062c\u0648\u062f \u062f\u0627\u0631\u062f.<\/p>\n<h4 id=\"addingstopwordstodefaultspacystopwordslist\">\u0627\u0641\u0632\u0648\u062f\u0646 Stop Words \u0628\u0647 \u0641\u0647\u0631\u0633\u062a \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 SpaCy Stop Words<\/h4>\n<p>\u0644\u06cc\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 SpaCy \u0627\u0633\u0627\u0633\u0627\u064b \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u06cc \u0627\u0632 \u0631\u0634\u062a\u0647 \u0647\u0627 \u0627\u0633\u062a.  \u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u06cc\u06a9 \u06a9\u0644\u0645\u0647 \u062c\u062f\u06cc\u062f \u0628\u0647 \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u0636\u0627\u0641\u0647 \u06a9\u0646\u06cc\u062f\u060c \u0647\u0645\u0627\u0646\u0637\u0648\u0631 \u06a9\u0647 \u0647\u0631 \u0645\u0648\u0631\u062f \u062c\u062f\u06cc\u062f\u06cc \u0631\u0627 \u0628\u0647 \u06cc\u06a9 \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u0636\u0627\u0641\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u062f.<\/p>\n<p>\u0628\u0647 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u062f \u06a9\u0647 \u062f\u0631 \u0622\u0646 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u0627\u0636\u0627\u0641\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>tennis<\/code> \u0628\u0647 \u0644\u06cc\u0633\u062a \u0645\u0648\u062c\u0648\u062f \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 Spacy:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> spacy\nsp = spacy.load(<span class=\"hljs-string\">'en_core_web_sm'<\/span>)\n\nall_stopwords = sp.Defaults.stop_words\nall_stopwords.add(<span class=\"hljs-string\">\"tennis\"<\/span>)\n\ntext = <span class=\"hljs-string\">\"Nick likes to play football, however he is not too fond of tennis.\"<\/span>\ntext_tokens = word_tokenize(text)\ntokens_without_sw = (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> text_tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-keyword\">not<\/span> word <span class=\"hljs-keyword\">in<\/span> all_stopwords)\n\n<span class=\"hljs-built_in\">print<\/span>(tokens_without_sw)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">(<span class=\"hljs-string\">'Nick'<\/span>, <span class=\"hljs-string\">'likes'<\/span>, <span class=\"hljs-string\">'play'<\/span>, <span class=\"hljs-string\">'football'<\/span>, <span class=\"hljs-string\">','<\/span>, <span class=\"hljs-string\">'fond'<\/span>, <span class=\"hljs-string\">'.'<\/span>)\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f \u06a9\u0647 \u06a9\u0644\u0645\u0647 <code>tennis<\/code> \u0627\u0632 \u062c\u0645\u0644\u0647 \u0648\u0631\u0648\u062f\u06cc \u062d\u0630\u0641 \u0634\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<p>\u0647\u0645\u0686\u0646\u06cc\u0646 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0645\u0627\u0646\u0646\u062f \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0686\u0646\u062f\u06cc\u0646 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u0628\u0647 \u0644\u06cc\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 SpaCy \u0627\u0636\u0627\u0641\u0647 \u06a9\u0646\u06cc\u062f.  \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0627\u0636\u0627\u0641\u0647 \u0645\u06cc \u06a9\u0646\u062f <code>likes<\/code> \u0648 <code>tennis<\/code> \u0628\u0647 \u0644\u06cc\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 SpaCy:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> spacy\nsp = spacy.load(<span class=\"hljs-string\">'en_core_web_sm'<\/span>)\n\nall_stopwords = sp.Defaults.stop_words\nall_stopwords |= {<span class=\"hljs-string\">\"likes\"<\/span>,<span class=\"hljs-string\">\"tennis\"<\/span>,}\n\ntext = <span class=\"hljs-string\">\"Nick likes to play football, however he is not too fond of tennis.\"<\/span>\ntext_tokens = word_tokenize(text)\ntokens_without_sw = (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> text_tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-keyword\">not<\/span> word <span class=\"hljs-keyword\">in<\/span> all_stopwords)\n\n<span class=\"hljs-built_in\">print<\/span>(tokens_without_sw)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">(<span class=\"hljs-string\">'Nick'<\/span>, <span class=\"hljs-string\">'play'<\/span>, <span class=\"hljs-string\">'football'<\/span>, <span class=\"hljs-string\">','<\/span>, <span class=\"hljs-string\">'fond'<\/span>, <span class=\"hljs-string\">'.'<\/span>)\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0627\u06cc\u0646 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f <code>likes<\/code> \u0648 <code>tennis<\/code> \u0647\u0631 \u062f\u0648 \u0627\u0632 \u062c\u0645\u0644\u0647 \u0648\u0631\u0648\u062f\u06cc \u062d\u0630\u0641 \u0634\u062f\u0647 \u0627\u0646\u062f.<\/p>\n<h4 id=\"removingstopwordsfromdefaultspacystopwordslist\">\u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u0641\u0647\u0631\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 SpaCy<\/h4>\n<p>\u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06cc\u06a9 \u06a9\u0644\u0645\u0647 \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 SpaCy\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u06a9\u0644\u0645\u0647 \u062d\u0630\u0641 \u0631\u0627 \u0628\u0647 \u0622\u0646 \u0645\u0646\u062a\u0642\u0644 \u06a9\u0646\u06cc\u062f <code>remove<\/code> \u0631\u0648\u0634 \u0645\u062c\u0645\u0648\u0639\u0647<\/p>\n<p>\u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u062d\u0630\u0641 \u0645\u06cc \u06a9\u0646\u062f <code>not<\/code> \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 SpaCy:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> spacy\nsp = spacy.load(<span class=\"hljs-string\">'en_core_web_sm'<\/span>)\n\nall_stopwords = sp.Defaults.stop_words\nall_stopwords.remove(<span class=\"hljs-string\">'not'<\/span>)\n\ntext = <span class=\"hljs-string\">\"Nick likes to play football, however he is not too fond of tennis.\"<\/span>\ntext_tokens = word_tokenize(text)\ntokens_without_sw = (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> text_tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-keyword\">not<\/span> word <span class=\"hljs-keyword\">in<\/span> all_stopwords)\n\n<span class=\"hljs-built_in\">print<\/span>(tokens_without_sw)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">(<span class=\"hljs-string\">'Nick'<\/span>, <span class=\"hljs-string\">'play'<\/span>, <span class=\"hljs-string\">'football'<\/span>, <span class=\"hljs-string\">','<\/span>, <span class=\"hljs-string\">'not'<\/span>, <span class=\"hljs-string\">'fond'<\/span>, <span class=\"hljs-string\">'.'<\/span>)\n<\/code><\/pre>\n<p>\u062f\u0631 \u062e\u0631\u0648\u062c\u06cc \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u06a9\u0644\u0645\u0647 \u0631\u0627 \u0628\u0628\u06cc\u0646\u06cc\u062f <code>not<\/code> \u0627\u0632 \u062c\u0645\u0644\u0647 \u0648\u0631\u0648\u062f\u06cc \u062d\u0630\u0641 \u0646\u0634\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<h3 id=\"usingcustomscripttoremovestopwords\"><span class=\"ez-toc-section\" id=\"%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%d8%a7%d8%b3%da%a9%d8%b1%db%8c%d9%be%d8%aa_%d8%b3%d9%81%d8%a7%d8%b1%d8%b4%db%8c_%d8%a8%d8%b1%d8%a7%db%8c_%d8%ad%d8%b0%d9%81_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%aa%d9%88%d9%82%d9%81\"><\/span>\u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0633\u0641\u0627\u0631\u0634\u06cc \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u062f\u0631 \u0628\u062e\u0634 \u0642\u0628\u0644\u060c \u0631\u0648\u0634 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641 \u0631\u0627 \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u06cc\u06a9 \u0631\u0634\u062a\u0647 \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0645\u0634\u0627\u0647\u062f\u0647 \u06a9\u0631\u062f\u06cc\u062f.  \u0627\u06af\u0631 \u0645\u06cc\u200c\u062e\u0648\u0627\u0647\u06cc\u062f \u06a9\u0646\u062a\u0631\u0644 \u06a9\u0627\u0645\u0644 \u0628\u0631 \u062d\u0630\u0641 \u06a9\u0644\u0645\u0647 \u062a\u0648\u0642\u0641 \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u06cc\u062f\u060c \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u062e\u0648\u062f \u0631\u0627 \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u0631\u0634\u062a\u0647 \u062e\u0648\u062f \u0628\u0646\u0648\u06cc\u0633\u06cc\u062f.<\/p>\n<p>\u0627\u0648\u0644\u06cc\u0646 \u0642\u062f\u0645 \u062f\u0631 \u0627\u06cc\u0646 \u0632\u0645\u06cc\u0646\u0647\u060c \u062a\u0639\u0631\u06cc\u0641 \u0644\u06cc\u0633\u062a\u06cc \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0645\u06cc \u062e\u0648\u0627\u0647\u06cc\u062f \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062f\u0631 \u0646\u0638\u0631 \u06af\u0631\u0641\u062a\u0647 \u0634\u0648\u0646\u062f.  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0641\u0647\u0631\u0633\u062a\u06cc \u0627\u0632 \u0628\u0631\u062e\u06cc \u0627\u0632 \u0645\u062a\u062f\u0627\u0648\u0644 \u062a\u0631\u06cc\u0646 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0631\u0627 \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\">my_stopwords = (<span class=\"hljs-string\">'i'<\/span>, <span class=\"hljs-string\">'me'<\/span>, <span class=\"hljs-string\">'my'<\/span>, <span class=\"hljs-string\">'myself'<\/span>, <span class=\"hljs-string\">'we'<\/span>, <span class=\"hljs-string\">'our'<\/span>, <span class=\"hljs-string\">'ours'<\/span>, <span class=\"hljs-string\">'ourselves'<\/span>, <span class=\"hljs-string\">'you'<\/span>, <span class=\"hljs-string\">\"you're\"<\/span>, <span class=\"hljs-string\">\"you've\"<\/span>, <span class=\"hljs-string\">\"you'll\"<\/span>, <span class=\"hljs-string\">\"you'd\"<\/span>, <span class=\"hljs-string\">'your'<\/span>, <span class=\"hljs-string\">'yours'<\/span>, <span class=\"hljs-string\">'yourself'<\/span>, <span class=\"hljs-string\">'yourselves'<\/span>, <span class=\"hljs-string\">'he'<\/span>, <span class=\"hljs-string\">'him'<\/span>, <span class=\"hljs-string\">'his'<\/span>, <span class=\"hljs-string\">'himself'<\/span>, <span class=\"hljs-string\">'she'<\/span>, <span class=\"hljs-string\">\"she's\"<\/span>, <span class=\"hljs-string\">'her'<\/span>, <span class=\"hljs-string\">'hers'<\/span>, <span class=\"hljs-string\">'herself'<\/span>, <span class=\"hljs-string\">'it'<\/span>, <span class=\"hljs-string\">\"it's\"<\/span>, <span class=\"hljs-string\">'its'<\/span>, <span class=\"hljs-string\">'itself'<\/span>, <span class=\"hljs-string\">'they'<\/span>, <span class=\"hljs-string\">'them'<\/span>, <span class=\"hljs-string\">'their'<\/span>, <span class=\"hljs-string\">'theirs'<\/span>, <span class=\"hljs-string\">'themselves'<\/span>, <span class=\"hljs-string\">'what'<\/span>, <span class=\"hljs-string\">'which'<\/span>, <span class=\"hljs-string\">'who'<\/span>, <span class=\"hljs-string\">'whom'<\/span>, <span class=\"hljs-string\">'this'<\/span>, <span class=\"hljs-string\">'that'<\/span>, <span class=\"hljs-string\">\"that'll\"<\/span>, <span class=\"hljs-string\">'these'<\/span>, <span class=\"hljs-string\">'those'<\/span>, <span class=\"hljs-string\">'am'<\/span>, <span class=\"hljs-string\">'is'<\/span>, <span class=\"hljs-string\">'are'<\/span>, <span class=\"hljs-string\">'was'<\/span>, <span class=\"hljs-string\">'were'<\/span>, <span class=\"hljs-string\">'be'<\/span>, <span class=\"hljs-string\">'been'<\/span>, <span class=\"hljs-string\">'being'<\/span>, <span class=\"hljs-string\">'have'<\/span>, <span class=\"hljs-string\">'has'<\/span>, <span class=\"hljs-string\">'had'<\/span>, <span class=\"hljs-string\">'having'<\/span>, <span class=\"hljs-string\">'do'<\/span>, <span class=\"hljs-string\">'does'<\/span>, <span class=\"hljs-string\">'did'<\/span>, <span class=\"hljs-string\">'doing'<\/span>, <span class=\"hljs-string\">'a'<\/span>, <span class=\"hljs-string\">'an'<\/span>, <span class=\"hljs-string\">'the'<\/span>, <span class=\"hljs-string\">'and'<\/span>, <span class=\"hljs-string\">'but'<\/span>, <span class=\"hljs-string\">'if'<\/span>, <span class=\"hljs-string\">'or'<\/span>, <span class=\"hljs-string\">'because'<\/span>, <span class=\"hljs-string\">'as'<\/span>, <span class=\"hljs-string\">'until'<\/span>, <span class=\"hljs-string\">'while'<\/span>, <span class=\"hljs-string\">'of'<\/span>, <span class=\"hljs-string\">'at'<\/span>, <span class=\"hljs-string\">'by'<\/span>, <span class=\"hljs-string\">'for'<\/span>, <span class=\"hljs-string\">'with'<\/span>, <span class=\"hljs-string\">'about'<\/span>, <span class=\"hljs-string\">'against'<\/span>, <span class=\"hljs-string\">'between'<\/span>, <span class=\"hljs-string\">'into'<\/span>, <span class=\"hljs-string\">'through'<\/span>, <span class=\"hljs-string\">'during'<\/span>, <span class=\"hljs-string\">'before'<\/span>, <span class=\"hljs-string\">'after'<\/span>, <span class=\"hljs-string\">'above'<\/span>, <span class=\"hljs-string\">'below'<\/span>, <span class=\"hljs-string\">'to'<\/span>, <span class=\"hljs-string\">'from'<\/span>, <span class=\"hljs-string\">'up'<\/span>, <span class=\"hljs-string\">'down'<\/span>, <span class=\"hljs-string\">'in'<\/span>, <span class=\"hljs-string\">'out'<\/span>, <span class=\"hljs-string\">'\u0631\u0648\u06cc'<\/span>, <span class=\"hljs-string\">'off'<\/span>, <span class=\"hljs-string\">'over'<\/span>, <span class=\"hljs-string\">'under'<\/span>, <span class=\"hljs-string\">'again'<\/span>, <span class=\"hljs-string\">'further'<\/span>, <span class=\"hljs-string\">'then'<\/span>, <span class=\"hljs-string\">'once'<\/span>, <span class=\"hljs-string\">'here'<\/span>, <span class=\"hljs-string\">'there'<\/span>, <span class=\"hljs-string\">'when'<\/span>, <span class=\"hljs-string\">'where'<\/span>, <span class=\"hljs-string\">'why'<\/span>, <span class=\"hljs-string\">'how'<\/span>, <span class=\"hljs-string\">'all'<\/span>, <span class=\"hljs-string\">'any'<\/span>, <span class=\"hljs-string\">'both'<\/span>, <span class=\"hljs-string\">'each'<\/span>, <span class=\"hljs-string\">'few'<\/span>, <span class=\"hljs-string\">'more'<\/span>, <span class=\"hljs-string\">'most'<\/span>, <span class=\"hljs-string\">'other'<\/span>, <span class=\"hljs-string\">'some'<\/span>, <span class=\"hljs-string\">'such'<\/span>, <span class=\"hljs-string\">'no'<\/span>, <span class=\"hljs-string\">'nor'<\/span>, <span class=\"hljs-string\">'not'<\/span>, <span class=\"hljs-string\">'only'<\/span>, <span class=\"hljs-string\">'own'<\/span>, <span class=\"hljs-string\">'same'<\/span>, <span class=\"hljs-string\">'so'<\/span>, <span class=\"hljs-string\">'than'<\/span>, <span class=\"hljs-string\">'too'<\/span>, <span class=\"hljs-string\">'very'<\/span>, <span class=\"hljs-string\">'s'<\/span>, <span class=\"hljs-string\">'t'<\/span>, <span class=\"hljs-string\">'can'<\/span>, <span class=\"hljs-string\">'will'<\/span>, <span class=\"hljs-string\">'just'<\/span>, <span class=\"hljs-string\">'don'<\/span>, <span class=\"hljs-string\">\"don't\"<\/span>, <span class=\"hljs-string\">'should'<\/span>, <span class=\"hljs-string\">\"should've\"<\/span>, <span class=\"hljs-string\">'now'<\/span>, <span class=\"hljs-string\">'d'<\/span>, <span class=\"hljs-string\">'ll'<\/span>, <span class=\"hljs-string\">'m'<\/span>, <span class=\"hljs-string\">'o'<\/span>, <span class=\"hljs-string\">'re'<\/span>, <span class=\"hljs-string\">'ve'<\/span>, <span class=\"hljs-string\">'y'<\/span>, <span class=\"hljs-string\">'ain'<\/span>, <span class=\"hljs-string\">'aren'<\/span>, <span class=\"hljs-string\">\"aren't\"<\/span>, <span class=\"hljs-string\">'couldn'<\/span>, <span class=\"hljs-string\">\"couldn't\"<\/span>, <span class=\"hljs-string\">'didn'<\/span>, <span class=\"hljs-string\">\"didn't\"<\/span>, <span class=\"hljs-string\">'doesn'<\/span>, <span class=\"hljs-string\">\"doesn't\"<\/span>, <span class=\"hljs-string\">'hadn'<\/span>, <span class=\"hljs-string\">\"hadn't\"<\/span>, <span class=\"hljs-string\">'hasn'<\/span>, <span class=\"hljs-string\">\"hasn't\"<\/span>, <span class=\"hljs-string\">'haven'<\/span>, <span class=\"hljs-string\">\"haven't\"<\/span>, <span class=\"hljs-string\">'isn'<\/span>, <span class=\"hljs-string\">\"isn't\"<\/span>, <span class=\"hljs-string\">'ma'<\/span>, <span class=\"hljs-string\">'mightn'<\/span>, <span class=\"hljs-string\">\"mightn't\"<\/span>, <span class=\"hljs-string\">'mustn'<\/span>, <span class=\"hljs-string\">\"mustn't\"<\/span>, <span class=\"hljs-string\">'needn'<\/span>, <span class=\"hljs-string\">\"needn't\"<\/span>, <span class=\"hljs-string\">'shan'<\/span>, <span class=\"hljs-string\">\"shan't\"<\/span>, <span class=\"hljs-string\">'shouldn'<\/span>, <span class=\"hljs-string\">\"shouldn't\"<\/span>, <span class=\"hljs-string\">'wasn'<\/span>, <span class=\"hljs-string\">\"wasn't\"<\/span>, <span class=\"hljs-string\">'weren'<\/span>, <span class=\"hljs-string\">\"weren't\"<\/span>, <span class=\"hljs-string\">'won'<\/span>, <span class=\"hljs-string\">\"won't\"<\/span>, <span class=\"hljs-string\">'wouldn'<\/span>, <span class=\"hljs-string\">\"wouldn't\"<\/span>)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0645\u0631\u062d\u0644\u0647 \u0628\u0639\u062f\u060c \u062a\u0627\u0628\u0639\u06cc \u0631\u0627 \u062a\u0639\u0631\u06cc\u0641 \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u06a9\u0647 \u06cc\u06a9 \u0631\u0634\u062a\u0647 \u0631\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0645\u06cc \u067e\u0630\u06cc\u0631\u062f \u0648 \u062c\u0645\u0644\u0647 \u0631\u0627 \u0628\u062f\u0648\u0646 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0628\u0627\u0632 \u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">remove_mystopwords<\/span>(<span class=\"hljs-params\">sentence<\/span>):<\/span>\n    tokens = sentence.split(<span class=\"hljs-string\">\" \"<\/span>)\n    tokens_filtered= (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> text_tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-keyword\">not<\/span> word <span class=\"hljs-keyword\">in<\/span> my_stopwords)\n    <span class=\"hljs-keyword\">return<\/span> (<span class=\"hljs-string\">\" \"<\/span>).join(tokens_filtered)\n<\/code><\/pre>\n<p>\u0627\u06a9\u0646\u0648\u0646 \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0633\u0639\u06cc \u06a9\u0646\u06cc\u0645 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u0627\u0632 \u06cc\u06a9 \u062c\u0645\u0644\u0647 \u0646\u0645\u0648\u0646\u0647 \u062d\u0630\u0641 \u06a9\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\">text = <span class=\"hljs-string\">\"Nick likes to play football, however he is not too fond of tennis.\"<\/span>\nfiltered_text = remove_mystopwords(text)\n<span class=\"hljs-built_in\">print<\/span>(filtered_text)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">Nick likes play , however fond tennis .\n<\/code><\/pre>\n<p>\u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0622\u0646 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641\u06cc \u0631\u0627 \u0628\u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u062f\u0631 \u0622\u0646 \u0648\u062c\u0648\u062f \u062f\u0627\u0631\u062f <code>my_stopwords<\/code> \u0644\u06cc\u0633\u062a \u0627\u0632 \u062c\u0645\u0644\u0647 \u0648\u0631\u0648\u062f\u06cc \u062d\u0630\u0641 \u0634\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<p>\u0627\u0632 \u0622\u0646\u062c\u0627 \u06a9\u0647 <code>my_stopwords<\/code> \u0644\u06cc\u0633\u062a \u06cc\u06a9 \u0644\u06cc\u0633\u062a \u0633\u0627\u062f\u0647 \u0627\u0632 \u0631\u0634\u062a\u0647 \u0647\u0627 \u0627\u0633\u062a\u060c \u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u0628\u0647 \u0622\u0646 \u0627\u0636\u0627\u0641\u0647 \u06cc\u0627 \u062d\u0630\u0641 \u06a9\u0646\u06cc\u062f.  \u0645\u062b\u0644\u0627\u064b \u06cc\u06a9 \u06a9\u0644\u0645\u0647 \u0627\u0636\u0627\u0641\u0647 \u06a9\u0646\u06cc\u0645 <code>football<\/code> \u062f\u0631 \u0644\u06cc\u0633\u062a <code>my_stopwords<\/code> \u0648 \u062f\u0648\u0628\u0627\u0631\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u0627\u0632 \u062c\u0645\u0644\u0647 \u0648\u0631\u0648\u062f\u06cc \u062d\u0630\u0641 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">text = <span class=\"hljs-string\">\"Nick likes to play football, however he is not too fond of tennis.\"<\/span>\nfiltered_text = remove_mystopwords(text)\n<span class=\"hljs-built_in\">print<\/span>(filtered_text)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">Nick likes play , however fond tennis .\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0627\u06a9\u0646\u0648\u0646 \u0627\u06cc\u0646 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f <code>football<\/code> \u0647\u0645\u0686\u0646\u06cc\u0646 \u0627\u0632 \u062c\u0645\u0644\u0647 \u0648\u0631\u0648\u062f\u06cc \u062d\u0630\u0641 \u0645\u06cc \u0634\u0648\u062f \u0632\u06cc\u0631\u0627 \u0645\u0627 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u062f\u0631 \u0644\u06cc\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0633\u0641\u0627\u0631\u0634\u06cc \u062e\u0648\u062f \u0627\u0636\u0627\u0641\u0647 \u06a9\u0631\u062f\u06cc\u0645.<\/p>\n<p>\u062d\u0627\u0644\u0627 \u0628\u06cc\u0627\u06cc\u06cc\u062f \u06a9\u0644\u0645\u0647 \u0631\u0627 \u062d\u0630\u0641 \u06a9\u0646\u06cc\u0645 <code>football<\/code> \u0627\u0632 \u0644\u06cc\u0633\u062a stop word \u0648 \u062f\u0648\u0628\u0627\u0631\u0647 \u062d\u0630\u0641 stop word \u0631\u0627 \u0628\u0647 \u062c\u0645\u0644\u0647 \u0648\u0631\u0648\u062f\u06cc \u062e\u0648\u062f \u0627\u0639\u0645\u0627\u0644 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">my_stopwords.remove(<span class=\"hljs-string\">\"football\"<\/span>)\n\ntext = <span class=\"hljs-string\">\"Nick likes to play football, however he is not too fond of tennis.\"<\/span>\nfiltered_text = remove_mystopwords(text)\n<span class=\"hljs-built_in\">print<\/span>(filtered_text)\n<\/code><\/pre>\n<p><strong>\u062e\u0631\u0648\u062c\u06cc:<\/strong><\/p>\n<pre><code class=\"hljs\">Nick likes play football , however fond tennis .\n<\/code><\/pre>\n<p>\u06a9\u0644\u0645\u0647 <code>football<\/code> \u0627\u0632 \u0632\u0645\u0627\u0646\u06cc \u06a9\u0647 \u0645\u0627 \u0622\u0646 \u0631\u0627 \u0627\u0632 \u0644\u06cc\u0633\u062a \u0644\u06cc\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062e\u0648\u062f \u062d\u0630\u0641 \u06a9\u0631\u062f\u06cc\u0645\u060c \u0627\u06a9\u0646\u0648\u0646 \u062d\u0630\u0641 \u0646\u0634\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<h3 id=\"conclusion\"><span class=\"ez-toc-section\" id=\"%d9%86%d8%aa%db%8c%d8%ac%d9%87\"><\/span>\u0646\u062a\u06cc\u062c\u0647<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641\u06cc \u0631\u0627 \u0645\u0634\u0627\u0647\u062f\u0647 \u06a9\u0631\u062f\u06cc\u062f \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0627\u0632 \u0622\u0646\u0647\u0627 \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u06cc\u06a9 \u0631\u0634\u062a\u0647 \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0631\u062f.  \u0647\u0645\u0686\u0646\u06cc\u0646 \u0631\u0648\u0634 \u0627\u0641\u0632\u0648\u062f\u0646 \u06cc\u0627 \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u0627\u0632 \u0644\u06cc\u0633\u062a \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u067e\u06cc\u0634 \u0641\u0631\u0636 \u0627\u0631\u0627\u0626\u0647 \u0634\u062f\u0647 \u062a\u0648\u0633\u0637 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641 \u0645\u0634\u0627\u0647\u062f\u0647 \u06a9\u0631\u062f\u06cc\u062f.  \u062f\u0631 \u067e\u0627\u06cc\u0627\u0646\u060c \u0646\u0634\u0627\u0646 \u062f\u0627\u062f\u06cc\u0645 \u06a9\u0647 \u0627\u06af\u0631 \u0627\u0632 \u06cc\u06a9 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0633\u0641\u0627\u0631\u0634\u06cc \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u06a9\u0646\u06cc\u062f\u060c \u0686\u06af\u0648\u0646\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646 \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0631\u0627 \u0627\u0646\u062c\u0627\u0645 \u062f\u0627\u062f.<\/p>\n<\/div>\n<p><script>\n                        !function(f,b,e,v,n,t,s)\n                        {if(f.fbq)return;n=f.fbq=function(){n.callMethod?\n                        n.callMethod.apply(n,arguments):n.queue.push(arguments)};\n                        if(!f._fbq)f._fbq=n;n.push=n;n.loaded=!0;n.version='2.0';\n                        n.queue=();t=b.createElement(e);t.async=!0;\n                        t.src=v;s=b.getElementsByTagName(e)(0);\n                        s.parentNode.insertBefore(t,s)}(window, document,'script',\n                        'https:\/\/connect.facebook.net\/en_US\/fbevents.js');\n                        fbq('init', '525232124909042');\n                        fbq('track', 'PageView');\n                    <\/script>    (\u0628\u0631\u0686\u0633\u0628\u200c\u0647\u0627 \u0628\u0647 \u062a\u0631\u062c\u0645\u0647)# python<br \/>\n<br \/><br \/>\n<br \/>\u0645\u0646\u062a\u0634\u0631 \u0634\u062f\u0647 \u062f\u0631 1403-01-18 01:11:05<br \/>\n<\/p>\n\n\n<div class=\"kk-star-ratings kksr-auto kksr-align-center kksr-valign-bottom\"\n    data-payload='{&quot;align&quot;:&quot;center&quot;,&quot;id&quot;:&quot;15863&quot;,&quot;slug&quot;:&quot;default&quot;,&quot;valign&quot;:&quot;bottom&quot;,&quot;ignore&quot;:&quot;&quot;,&quot;reference&quot;:&quot;auto&quot;,&quot;class&quot;:&quot;&quot;,&quot;count&quot;:&quot;0&quot;,&quot;legendonly&quot;:&quot;&quot;,&quot;readonly&quot;:&quot;&quot;,&quot;score&quot;:&quot;0&quot;,&quot;starsonly&quot;:&quot;&quot;,&quot;best&quot;:&quot;5&quot;,&quot;gap&quot;:&quot;5&quot;,&quot;greet&quot;:&quot;\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628&quot;,&quot;legend&quot;:&quot;0\\\/5 (0 \u0631\u0627\u06cc)&quot;,&quot;size&quot;:&quot;30&quot;,&quot;title&quot;:&quot;\u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u0631\u0634\u062a\u0647 \u0647\u0627 \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646&quot;,&quot;width&quot;:&quot;0&quot;,&quot;_legend&quot;:&quot;{score}\\\/{best} ({count} \u0631\u0627\u06cc)&quot;,&quot;font_factor&quot;:&quot;1.25&quot;}'>\n            \n<div class=\"kksr-stars\">\n    \n<div class=\"kksr-stars-inactive\">\n            <div class=\"kksr-star\" data-star=\"1\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"2\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"3\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"4\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"5\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n    \n<div class=\"kksr-stars-active\" style=\"width: 0px;\">\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n<\/div>\n                \n\n<div class=\"kksr-legend\" style=\"font-size: 24px;\">\n            <span class=\"kksr-muted\">\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628<\/span>\n    <\/div>\n    <\/div>\n","protected":false},"excerpt":{"rendered":"<p><span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 13<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span>\u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0642\u0635\u062f \u062f\u0627\u0631\u06cc\u062f \u062a\u06a9\u0646\u06cc\u06a9 \u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641\u06cc \u0631\u0627 \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0632 \u0631\u0634\u062a\u0647 \u0647\u0627 \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0645\u0634\u0627\u0647\u062f\u0647 \u06a9\u0646\u06cc\u062f. \u06a9\u0644\u0645\u0627\u062a Stop \u0622\u0646 \u062f\u0633\u062a\u0647 \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u062f\u0631 \u0632\u0628\u0627\u0646 \u0637\u0628\u06cc\u0639\u06cc \u0647\u0633\u062a\u0646\u062f \u06a9\u0647 \u0645\u0639\u0646\u06cc \u0628\u0633\u06cc\u0627\u0631 \u06a9\u0645\u06cc \u062f\u0627\u0631\u0646\u062f\u060c \u0645\u0627\u0646\u0646\u062f &#8220;is&#8221;\u060c &#8220;an&#8221;\u060c &#8220;the&#8221;\u060c \u0648 \u063a\u06cc\u0631\u0647. \u0645\u0648\u062a\u0648\u0631\u0647\u0627\u06cc \u062c\u0633\u062a\u062c\u0648 \u0648 \u0633\u0627\u06cc\u0631 \u067e\u0644\u062a\u0641\u0631\u0645\u200c\u0647\u0627\u06cc \u0646\u0645\u0627\u06cc\u0647\u200c\u0633\u0627\u0632\u06cc \u0633\u0627\u0632\u0645\u0627\u0646\u06cc \u0627\u063a\u0644\u0628 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0631\u0627 \u0641\u06cc\u0644\u062a\u0631 \u0645\u06cc\u200c\u06a9\u0646\u0646\u062f \u062f\u0631 [&hellip;]<\/p>\n","protected":false},"author":3,"featured_media":9162,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1743,620],"tags":[],"class_list":["post-15863","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-python","category-programming"],"acf":[],"_links":{"self":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/15863","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/comments?post=15863"}],"version-history":[{"count":0,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/15863\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media\/9162"}],"wp:attachment":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media?parent=15863"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/categories?post=15863"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/tags?post=15863"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}