{"id":16288,"date":"2024-01-23T02:48:14","date_gmt":"2024-01-22T23:18:14","guid":{"rendered":"https:\/\/rasanegar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/"},"modified":"2024-01-23T02:48:14","modified_gmt":"2024-01-22T23:18:14","slug":"%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1","status":"publish","type":"post","link":"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/","title":{"rendered":"\u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc NLP: \u06a9\u0627\u0631 \u0628\u0627 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim (\u0642\u0633\u0645\u062a 1)"},"content":{"rendered":"<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_85 counter-hierarchy ez-toc-counter ez-toc-custom ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\"><p class=\"ez-toc-title\" style=\"cursor:inherit\">\u0633\u0631\u0641\u0635\u0644\u0647\u0627\u06cc \u0645\u0637\u0644\u0628<\/p>\n<\/div><nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/#%d9%86%d8%b5%d8%a8_%d8%ac%d9%86%d8%b3%db%8c%d9%85\" >\u0646\u0635\u0628 \u062c\u0646\u0633\u06cc\u0645<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/#%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%d9%81%d8%b1%d9%87%d9%86%da%af_%d9%84%d8%ba%d8%aa\" >\u0627\u06cc\u062c\u0627\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/#%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%d8%af%db%8c%da%a9%d8%b4%d9%86%d8%b1%db%8c_%d8%a8%d8%a7_%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%d8%a7%d8%b4%db%8c%d8%a7%d8%a1_%d8%af%d8%b1%d9%88%d9%86_%d8%ad%d8%a7%d9%81%d8%b8%d9%87\" >\u0627\u06cc\u062c\u0627\u062f \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0627\u0634\u06cc\u0627\u0621 \u062f\u0631\u0648\u0646 \u062d\u0627\u0641\u0638\u0647<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/#%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%d9%81%d8%b1%d9%87%d9%86%da%af_%d9%84%d8%ba%d8%aa_%d8%a8%d8%a7_%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%d9%81%d8%a7%db%8c%d9%84_%d9%87%d8%a7%db%8c_%d9%85%d8%aa%d9%86%db%8c\" >\u0627\u06cc\u062c\u0627\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0641\u0627\u06cc\u0644 \u0647\u0627\u06cc \u0645\u062a\u0646\u06cc<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/#%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%d9%85%d8%ac%d9%85%d9%88%d8%b9%d9%87_%da%a9%d9%84%d9%85%d8%a7%d8%aa\" >\u0627\u06cc\u062c\u0627\u062f \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/#%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%da%a9%db%8c%d8%b3%d9%87_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%a7%d8%b2_%d8%a7%d8%b4%db%8c%d8%a7%d8%a1_%d8%af%d8%b1%d9%88%d9%86_%d8%ad%d8%a7%d9%81%d8%b8%d9%87\" >\u0627\u06cc\u062c\u0627\u062f \u06a9\u06cc\u0633\u0647 \u06a9\u0644\u0645\u0627\u062a \u0627\u0632 \u0627\u0634\u06cc\u0627\u0621 \u062f\u0631\u0648\u0646 \u062d\u0627\u0641\u0638\u0647<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/#%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%da%a9%db%8c%d8%b3%d9%87_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%a7%d8%b2_%d9%81%d8%a7%db%8c%d9%84_%d9%87%d8%a7%db%8c_%d9%85%d8%aa%d9%86%db%8c\" >\u0627\u06cc\u062c\u0627\u062f \u06a9\u06cc\u0633\u0647 \u06a9\u0644\u0645\u0627\u062a \u0627\u0632 \u0641\u0627\u06cc\u0644 \u0647\u0627\u06cc \u0645\u062a\u0646\u06cc<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/#%d8%a7%db%8c%d8%ac%d8%a7%d8%af_tf-idf_corpus\" >\u0627\u06cc\u062c\u0627\u062f TF-IDF Corpus<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/#%d8%af%d8%a7%d9%86%d9%84%d9%88%d8%af_%d9%85%d8%af%d9%84%e2%80%8c%d9%87%d8%a7_%d9%88_%d9%85%d8%ac%d9%85%d9%88%d8%b9%d9%87_%d8%af%d8%a7%d8%af%d9%87%e2%80%8c%d9%87%d8%a7%db%8c_%d8%af%d8%a7%d8%ae%d9%84%db%8c_gensim\" >\u062f\u0627\u0646\u0644\u0648\u062f \u0645\u062f\u0644\u200c\u0647\u0627 \u0648 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647\u200c\u0647\u0627\u06cc \u062f\u0627\u062e\u0644\u06cc Gensim<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d9%82%d8%b3%d9%85%d8%aa-1\/#%d9%86%d8%aa%db%8c%d8%ac%d9%87\" >\u0646\u062a\u06cc\u062c\u0647<\/a><\/li><\/ul><\/nav><\/div>\n<span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 13<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span><p> <br \/>\n<\/p>\n<div><noscript><\/noscript><\/p>\n<p>\u0627\u06cc\u0646 \u062f\u0647\u0645\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0627\u0632 \u0633\u0631\u06cc \u0645\u0642\u0627\u0644\u0627\u062a \u0645\u0646 \u0627\u0633\u062a \u0631\u0648\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc NLP.  \u062f\u0631 \u0645\u0642\u0627\u0644\u0647 \u0642\u0628\u0644\u06cc \u062e\u0648\u062f \u062a\u0648\u0636\u06cc\u062d \u062f\u0627\u062f\u0645 \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 <a target=\"_blank\" rel=\"nofollow noopener\" href=\"https:\/\/stanfordnlp.github.io\/CoreNLP\/index.html\">StanfordCoreNLP<\/a> \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u0628\u0631\u0627\u06cc \u0627\u0646\u062c\u0627\u0645 \u0648\u0638\u0627\u06cc\u0641 \u0645\u062e\u062a\u0644\u0641 NLP \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0634\u0648\u062f.<\/p>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0628\u0647 \u0628\u0631\u0631\u0633\u06cc \u0622\u0646 \u0645\u06cc \u067e\u0631\u062f\u0627\u0632\u06cc\u0645 <a target=\"_blank\" rel=\"nofollow noopener\" href=\"https:\/\/radimrehurek.com\/gensim\/intro.html\">\u062c\u0646\u0633\u06cc\u0645<\/a> \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647\u060c \u06a9\u0647 \u06cc\u06a9\u06cc \u062f\u06cc\u06af\u0631 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc \u0628\u0633\u06cc\u0627\u0631 \u0645\u0641\u06cc\u062f NLP \u0628\u0631\u0627\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646 \u0627\u0633\u062a.  Gensim \u062f\u0631 \u062f\u0631\u062c\u0647 \u0627\u0648\u0644 \u0628\u0631\u0627\u06cc \u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639\u06cc \u062a\u0648\u0633\u0639\u0647 \u062f\u0627\u062f\u0647 \u0634\u062f.  \u0628\u0627 \u0627\u06cc\u0646 \u062d\u0627\u0644\u060c \u0627\u06a9\u0646\u0648\u0646 \u0627\u0632 \u0627\u0646\u0648\u0627\u0639 \u0648\u0638\u0627\u06cc\u0641 NLP \u062f\u06cc\u06af\u0631 \u0645\u0627\u0646\u0646\u062f \u062a\u0628\u062f\u06cc\u0644 \u06a9\u0644\u0645\u0627\u062a \u0628\u0647 \u0628\u0631\u062f\u0627\u0631 (word2vec)\u060c \u0633\u0646\u062f \u0628\u0647 \u0628\u0631\u062f\u0627\u0631 (doc2vec)\u060c \u06cc\u0627\u0641\u062a\u0646 \u0634\u0628\u0627\u0647\u062a \u0645\u062a\u0646 \u0648 \u062e\u0644\u0627\u0635\u0647 \u0633\u0627\u0632\u06cc \u0645\u062a\u0646 \u067e\u0634\u062a\u06cc\u0628\u0627\u0646\u06cc \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0648 \u0645\u0642\u0627\u0644\u0647 \u0628\u0639\u062f\u06cc \u0627\u06cc\u0646 \u0645\u062c\u0645\u0648\u0639\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u06cc\u062f \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0628\u0631\u0627\u06cc \u0627\u0646\u062c\u0627\u0645 \u0627\u06cc\u0646 \u06a9\u0627\u0631\u0647\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f.<\/p>\n<h2 id=\"installinggensim\"><span class=\"ez-toc-section\" id=\"%d9%86%d8%b5%d8%a8_%d8%ac%d9%86%d8%b3%db%8c%d9%85\"><\/span>\u0646\u0635\u0628 \u062c\u0646\u0633\u06cc\u0645<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0627\u06af\u0631 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u062f pip \u0646\u0635\u0628 \u06a9\u0646\u0646\u062f\u0647 \u0628\u0631\u0627\u06cc \u0646\u0635\u0628 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646 \u062e\u0648\u062f\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u0632 \u062f\u0633\u062a\u0648\u0631 \u0632\u06cc\u0631 \u0628\u0631\u0627\u06cc \u062f\u0627\u0646\u0644\u0648\u062f \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-meta\">$<\/span><span class=\"bash\"> pip install gensim<\/span>\n<\/code><\/pre>\n<p>\u0647\u0645\u0686\u0646\u06cc\u0646\u060c \u0627\u06af\u0631 \u0627\u0632 \u062a\u0648\u0632\u06cc\u0639 Anaconda \u067e\u0627\u06cc\u062a\u0648\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u06a9\u0646\u06cc\u062f\u060c \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u062f\u0633\u062a\u0648\u0631 \u0632\u06cc\u0631 \u0631\u0627 \u0628\u0631\u0627\u06cc \u0646\u0635\u0628 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-meta\">$<\/span><span class=\"bash\"> conda install -c anaconda gensim<\/span>\n<\/code><\/pre>\n<p>\u062d\u0627\u0644\u0627 \u0628\u0628\u06cc\u0646\u06cc\u0645 \u0686\u06af\u0648\u0646\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u0645 \u0648\u0638\u0627\u06cc\u0641 \u0645\u062e\u062a\u0644\u0641 NLP \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u0645.<\/p>\n<h2 id=\"creatingdictionaries\"><span class=\"ez-toc-section\" id=\"%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%d9%81%d8%b1%d9%87%d9%86%da%af_%d9%84%d8%ba%d8%aa\"><\/span>\u0627\u06cc\u062c\u0627\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0627\u0644\u06af\u0648\u0631\u06cc\u062a\u0645 \u0647\u0627\u06cc \u0622\u0645\u0627\u0631\u06cc \u0628\u0627 \u0627\u0639\u062f\u0627\u062f \u06a9\u0627\u0631 \u0645\u06cc \u06a9\u0646\u0646\u062f\u060c \u0628\u0627 \u0627\u06cc\u0646 \u062d\u0627\u0644\u060c \u0632\u0628\u0627\u0646 \u0647\u0627\u06cc \u0637\u0628\u06cc\u0639\u06cc \u062d\u0627\u0648\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc\u06cc \u0628\u0647 \u0634\u06a9\u0644 \u0645\u062a\u0646 \u0647\u0633\u062a\u0646\u062f.  \u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646 \u0645\u06a9\u0627\u0646\u06cc\u0632\u0645\u06cc \u0628\u0631\u0627\u06cc \u062a\u0628\u062f\u06cc\u0644 \u06a9\u0644\u0645\u0627\u062a \u0628\u0647 \u0627\u0639\u062f\u0627\u062f \u0645\u0648\u0631\u062f \u0646\u06cc\u0627\u0632 \u0627\u0633\u062a.  \u0628\u0647 \u0637\u0648\u0631 \u0645\u0634\u0627\u0628\u0647\u060c \u067e\u0633 \u0627\u0632 \u0627\u0639\u0645\u0627\u0644 \u0627\u0646\u0648\u0627\u0639 \u0645\u062e\u062a\u0644\u0641 \u0641\u0631\u0622\u06cc\u0646\u062f\u0647\u0627 \u0631\u0648\u06cc \u0627\u0639\u062f\u0627\u062f\u060c \u0645\u0627 \u0628\u0627\u06cc\u062f \u0627\u0639\u062f\u0627\u062f \u0631\u0627 \u0628\u0647 \u0645\u062a\u0646 \u062a\u0628\u062f\u06cc\u0644 \u06a9\u0646\u06cc\u0645.<\/p>\n<p>\u06cc\u06a9\u06cc \u0627\u0632 \u0631\u0627\u0647\u200c\u0647\u0627\u06cc \u062f\u0633\u062a\u06cc\u0627\u0628\u06cc \u0628\u0647 \u0627\u06cc\u0646 \u0646\u0648\u0639 \u0639\u0645\u0644\u06a9\u0631\u062f\u060c \u0627\u06cc\u062c\u0627\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0627\u0633\u062a \u06a9\u0647 \u06cc\u06a9 \u0634\u0646\u0627\u0633\u0647 \u0639\u062f\u062f\u06cc \u0631\u0627 \u0628\u0647 \u0647\u0631 \u06a9\u0644\u0645\u0647 \u0645\u0646\u062d\u0635\u0631\u0628\u0647\u200c\u0641\u0631\u062f \u062f\u0631 \u0633\u0646\u062f \u0627\u062e\u062a\u0635\u0627\u0635 \u0645\u06cc\u200c\u062f\u0647\u062f.  \u0633\u067e\u0633 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0627\u0632 \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0628\u0631\u0627\u06cc \u06cc\u0627\u0641\u062a\u0646 \u0645\u0639\u0627\u062f\u0644 \u0639\u062f\u062f\u06cc \u06cc\u06a9 \u06a9\u0644\u0645\u0647 \u0648 \u0628\u0627\u0644\u0639\u06a9\u0633 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0631\u062f.<\/p>\n<h3 id=\"creatingdictionariesusinginmemoryobjects\"><span class=\"ez-toc-section\" id=\"%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%d8%af%db%8c%da%a9%d8%b4%d9%86%d8%b1%db%8c_%d8%a8%d8%a7_%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%d8%a7%d8%b4%db%8c%d8%a7%d8%a1_%d8%af%d8%b1%d9%88%d9%86_%d8%ad%d8%a7%d9%81%d8%b8%d9%87\"><\/span>\u0627\u06cc\u062c\u0627\u062f \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0627\u0634\u06cc\u0627\u0621 \u062f\u0631\u0648\u0646 \u062d\u0627\u0641\u0638\u0647<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0627\u06cc\u062c\u0627\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u06a9\u0647 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u0628\u0647 ID \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u067e\u0627\u06cc\u062a\u0648\u0646 \u0646\u06af\u0627\u0634\u062a \u0645\u06cc \u06a9\u0646\u062f\u060c \u0628\u0633\u06cc\u0627\u0631 \u0622\u0633\u0627\u0646 \u0627\u0633\u062a.  \u0628\u0647 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> gensim\n<span class=\"hljs-keyword\">from<\/span> gensim <span class=\"hljs-keyword\">import<\/span> corpora\n<span class=\"hljs-keyword\">from<\/span> pprint <span class=\"hljs-keyword\">import<\/span> pprint\n\ntext = (<span class=\"hljs-string\">\"\"\"In computer science, artificial intelligence (AI),\n             sometimes called machine intelligence, is intelligence\n             demonstrated by machines, in contrast to the natural intelligence\n             displayed by humans and animals. Computer science defines\n             AI research as the study of intelligent agents: any device that\n             perceives its environment and takes actions that maximize its chance\n             of successfully achieving its goals.\"\"\"<\/span>)\n\ntokens = ((token <span class=\"hljs-keyword\">for<\/span> token <span class=\"hljs-keyword\">in<\/span> sentence.split()) <span class=\"hljs-keyword\">for<\/span> sentence <span class=\"hljs-keyword\">in<\/span> text)\ngensim_dictionary = corpora.Dictionary(tokens)\n\n<span class=\"hljs-built_in\">print<\/span>(<span class=\"hljs-string\">\"The dictionary has: \"<\/span> +<span class=\"hljs-built_in\">str<\/span>(<span class=\"hljs-built_in\">len<\/span>(gensim_dictionary)) + <span class=\"hljs-string\">\" tokens\"<\/span>)\n\n<span class=\"hljs-keyword\">for<\/span> k, v <span class=\"hljs-keyword\">in<\/span> gensim_dictionary.token2id.items():\n    <span class=\"hljs-built_in\">print<\/span>(<span class=\"hljs-string\">f'<span class=\"hljs-subst\">{k:{<span class=\"hljs-number\">15<\/span>}<\/span>} <span class=\"hljs-subst\">{v:{<span class=\"hljs-number\">10<\/span>}<\/span>}'<\/span>)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627\u060c \u0645\u0627 \u0627\u0628\u062a\u062f\u0627 import \u0631\u0627 <code>gensim<\/code> \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0628\u0647 \u0647\u0645\u0631\u0627\u0647 <code>corpora<\/code> \u0645\u0627\u0698\u0648\u0644 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647  \u062f\u0631 \u0645\u0631\u062d\u0644\u0647 \u0628\u0639\u062f\u060c \u0645\u062a\u0646\u06cc \u062f\u0627\u0631\u06cc\u0645 (\u06a9\u0647 \u0642\u0633\u0645\u062a \u0627\u0648\u0644 \u067e\u0627\u0631\u0627\u06af\u0631\u0627\u0641 \u0627\u0648\u0644 \u0645\u0642\u0627\u0644\u0647 \u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627 \u0627\u0633\u062a \u0631\u0648\u06cc \u0647\u0648\u0634 \u0645\u0635\u0646\u0648\u0639\u06cc) \u0630\u062e\u06cc\u0631\u0647 \u0634\u062f\u0647 \u062f\u0631 <code>text<\/code> \u0645\u062a\u063a\u06cc\u0631.<\/p>\n<p>\u0628\u0631\u0627\u06cc \u0627\u06cc\u062c\u0627\u062f \u06cc\u06a9 \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a\u060c \u0645\u0627 \u0628\u0647 \u0644\u06cc\u0633\u062a\u06cc \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u0627\u0632 \u0645\u062a\u0646 \u062e\u0648\u062f (\u06a9\u0647 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0646\u0634\u0627\u0646\u0647 \u0646\u06cc\u0632 \u0634\u0646\u0627\u062e\u062a\u0647 \u0645\u06cc \u0634\u0648\u062f) \u0646\u06cc\u0627\u0632 \u062f\u0627\u0631\u06cc\u0645.  \u062f\u0631 \u062e\u0637 \u0632\u06cc\u0631\u060c \u0633\u0646\u062f \u062e\u0648\u062f \u0631\u0627 \u0628\u0647 \u062c\u0645\u0644\u0627\u062a \u0648 \u0633\u067e\u0633 \u062c\u0645\u0644\u0627\u062a \u0631\u0627 \u0628\u0647 \u06a9\u0644\u0645\u0627\u062a \u062a\u0642\u0633\u06cc\u0645 \u0645\u06cc \u06a9\u0646\u06cc\u0645.<\/p>\n<pre><code class=\"hljs\">tokens = ((token <span class=\"hljs-keyword\">for<\/span> token <span class=\"hljs-keyword\">in<\/span> sentence.split()) <span class=\"hljs-keyword\">for<\/span> sentence <span class=\"hljs-keyword\">in<\/span> text)\n<\/code><\/pre>\n<p>\u0645\u0627 \u0627\u06a9\u0646\u0648\u0646 \u0622\u0645\u0627\u062f\u0647 \u0627\u06cc\u062c\u0627\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u062e\u0648\u062f \u0647\u0633\u062a\u06cc\u0645.  \u0628\u0631\u0627\u06cc \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0632 <code>Dictionary<\/code> \u0645\u0648\u0636\u0648\u0639 \u0627\u0632 <code>corpora<\/code> \u0645\u0627\u0698\u0648\u0644 \u0648 \u0622\u0646 \u0631\u0627 \u0628\u0647 \u0644\u06cc\u0633\u062a \u062a\u0648\u06a9\u0646 \u0647\u0627 \u0645\u0646\u062a\u0642\u0644 \u06a9\u0646\u06cc\u062f.<\/p>\n<p>\u062f\u0631 \u0646\u0647\u0627\u06cc\u062a\u060c \u0628\u0647 print \u0627\u0632 \u0645\u062d\u062a\u0648\u06cc\u0627\u062a \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u062a\u0627\u0632\u0647 \u0627\u06cc\u062c\u0627\u062f \u0634\u062f\u0647\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u0645 <code>token2id<\/code> \u0645\u0648\u0636\u0648\u0639 \u0627\u0632 <code>Dictionary<\/code> \u06a9\u0644\u0627\u0633  \u062e\u0631\u0648\u062c\u06cc \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627 \u0628\u0647 \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">The dictionary has: 46 tokens\n(AI),                    0\nAI                       1\nComputer                 2\nIn                       3\nachieving                4\nactions                  5\nagents:                  6\nand                      7\nanimals.                 8\nany                      9\nartificial              10\nas                      11\nby                      12\ncalled                  13\nchance                  14\ncomputer                15\ncontrast                16\ndefines                 17\ndemonstrated            18\ndevice                  19\ndisplayed               20\nenvironment             21\ngoals.                  22\nhumans                  23\nin                      24\nintelligence            25\nintelligence,           26\nintelligent             27\nis                      28\nits                     29\nmachine                 30\nmachines,               31\nmaximize                32\nnatural                 33\nof                      34\nperceives               35\nresearch                36\nscience                 37\nscience,                38\nsometimes               39\nstudy                   40\nsuccessfully            41\ntakes                   42\nthat                    43\nthe                     44\nto                      45\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0647\u0631 \u06a9\u0644\u0645\u0647 \u0645\u0646\u062d\u0635\u0631 \u0628\u0647 \u0641\u0631\u062f \u062f\u0631 \u0645\u062a\u0646 \u0645\u0627 \u0631\u0627 \u0628\u0647 \u0647\u0645\u0631\u0627\u0647 \u0634\u0646\u0627\u0633\u0647 \u0639\u062f\u062f\u06cc \u06a9\u0647 \u06a9\u0644\u0645\u0647 \u0628\u0647 \u0622\u0646 \u0627\u062e\u062a\u0635\u0627\u0635 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0631\u0627 \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f.  \u06a9\u0644\u0645\u0647 \u06cc\u0627 \u0646\u0634\u0627\u0646\u0647 \u06a9\u0644\u06cc\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0648 \u0634\u0646\u0627\u0633\u0647 \u0645\u0642\u062f\u0627\u0631 \u0627\u0633\u062a.  \u0647\u0645\u0686\u0646\u06cc\u0646 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0634\u0646\u0627\u0633\u0647 \u0627\u062e\u062a\u0635\u0627\u0635 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0628\u0647 \u0647\u0631 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0645\u0634\u0627\u0647\u062f\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-built_in\">print<\/span>(gensim_dictionary.token2id(<span class=\"hljs-string\">\"study\"<\/span>))\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627\u060c \u06a9\u0644\u0645\u0647 &#8220;\u0645\u0637\u0627\u0644\u0639\u0647&#8221; \u0631\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u06a9\u0644\u06cc\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u062e\u0648\u062f \u067e\u0627\u0633 \u0645\u06cc \u06a9\u0646\u06cc\u0645.  \u062f\u0631 \u062e\u0631\u0648\u062c\u06cc \u0628\u0627\u06cc\u062f \u0645\u0642\u062f\u0627\u0631 \u0645\u0631\u0628\u0648\u0637\u0647 \u06cc\u0639\u0646\u06cc \u0634\u0646\u0627\u0633\u0647 \u06a9\u0644\u0645\u0647 &#8220;study&#8221; \u0631\u0627 \u0628\u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 40 \u0627\u0633\u062a.<\/p>\n<p>\u0628\u0647 \u0637\u0648\u0631 \u0645\u0634\u0627\u0628\u0647\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u0632 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0628\u0631\u0627\u06cc \u067e\u06cc\u062f\u0627 \u06a9\u0631\u062f\u0646 \u06a9\u0644\u06cc\u062f \u06cc\u0627 \u06a9\u0644\u0645\u0647 \u0628\u0631\u0627\u06cc \u06cc\u06a9 \u0634\u0646\u0627\u0633\u0647 \u062e\u0627\u0635 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f.<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-built_in\">print<\/span>(<span class=\"hljs-built_in\">list<\/span>(gensim_dictionary.token2id.keys())(<span class=\"hljs-built_in\">list<\/span>(gensim_dictionary.token2id.values()).index(<span class=\"hljs-number\">40<\/span>)))\n<\/code><\/pre>\n<p>\u0628\u0647 print \u062a\u0648\u06a9\u0646\u200c\u0647\u0627 \u0648 \u0634\u0646\u0627\u0633\u0647\u200c\u0647\u0627\u06cc \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0622\u0646\u200c\u0647\u0627 \u0627\u0632 \u06cc\u06a9 \u062d\u0644\u0642\u0647 for \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0631\u062f\u06cc\u0645.  \u0628\u0627 \u0627\u06cc\u0646 \u062d\u0627\u0644\u060c \u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0647 \u0637\u0648\u0631 \u0645\u0633\u062a\u0642\u06cc\u0645 print \u062a\u0648\u06a9\u0646 \u0647\u0627 \u0648 \u0634\u0646\u0627\u0633\u0647 \u0647\u0627\u06cc \u0622\u0646\u0647\u0627 \u0628\u0627 \u0686\u0627\u067e \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a\u060c \u0647\u0645\u0627\u0646\u0637\u0648\u0631 \u06a9\u0647 \u062f\u0631 \u0627\u06cc\u0646\u062c\u0627 \u0646\u0634\u0627\u0646 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-built_in\">print<\/span>(gensim_dictionary.token2id)\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0628\u0647 \u0635\u0648\u0631\u062a \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">{'(AI),': 0, 'AI': 1, 'Computer': 2, 'In': 3, 'achieving': 4, 'actions': 5, 'agents:': 6, 'and': 7, 'animals.': 8, 'any': 9, 'artificial': 10, 'as': 11, 'by': 12, 'called': 13, 'chance': 14, 'computer': 15, 'contrast': 16, 'defines': 17, 'demonstrated': 18, 'device': 19, 'displayed': 20, 'environment': 21, 'goals.': 22, 'humans': 23, 'in': 24, 'intelligence': 25, 'intelligence,': 26, 'intelligent': 27, 'is': 28, 'its': 29, 'machine': 30, 'machines,': 31, 'maximize': 32, 'natural': 33, 'of': 34, 'perceives': 35, 'research': 36, 'science': 37, 'science,': 38, 'sometimes': 39, 'study': 40, 'successfully': 41, 'takes': 42, 'that': 43, 'the': 44, 'to': 45}\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0645\u0645\u06a9\u0646 \u0627\u0633\u062a \u0628\u0647 \u0627\u0646\u062f\u0627\u0632\u0647 \u0622\u0646\u0686\u0647 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u062d\u0644\u0642\u0647 \u0686\u0627\u067e \u0634\u062f\u0647 \u0627\u0633\u062a \u0648\u0627\u0636\u062d \u0646\u0628\u0627\u0634\u062f\u060c \u0627\u06af\u0631\u0686\u0647 \u0647\u0646\u0648\u0632 \u0647\u0645 \u0628\u0647 \u0647\u062f\u0641 \u062e\u0648\u062f \u0639\u0645\u0644 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u062d\u0627\u0644 \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0628\u0628\u06cc\u0646\u06cc\u0645 \u0686\u06af\u0648\u0646\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u0645 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06cc\u06a9 \u0633\u0646\u062f \u062c\u062f\u06cc\u062f\u060c \u0646\u0634\u0627\u0646\u0647\u200c\u0647\u0627\u06cc \u0628\u06cc\u0634\u062a\u0631\u06cc \u0631\u0627 \u0628\u0647 \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0645\u0648\u062c\u0648\u062f \u0627\u0636\u0627\u0641\u0647 \u06a9\u0646\u06cc\u0645.  \u0628\u0647 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">text = (<span class=\"hljs-string\">\"\"\"Colloquially, the term \"artificial intelligence\" is used to\n           describe machines that mimic \"cognitive\" functions that humans\n           associate with other human minds, such as \"learning\" and \"problem solving\"\"\"<\/span>)\n\ntokens = ((token <span class=\"hljs-keyword\">for<\/span> token <span class=\"hljs-keyword\">in<\/span> sentence.split()) <span class=\"hljs-keyword\">for<\/span> sentence <span class=\"hljs-keyword\">in<\/span> text)\ngensim_dictionary.add_documents(tokens)\n\n<span class=\"hljs-built_in\">print<\/span>(<span class=\"hljs-string\">\"The dictionary has: \"<\/span> + <span class=\"hljs-built_in\">str<\/span>(<span class=\"hljs-built_in\">len<\/span>(gensim_dictionary)) + <span class=\"hljs-string\">\" tokens\"<\/span>)\n<span class=\"hljs-built_in\">print<\/span>(gensim_dictionary.token2id)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627 \u06cc\u06a9 \u0633\u0646\u062f \u062c\u062f\u06cc\u062f \u062f\u0627\u0631\u06cc\u0645 \u06a9\u0647 \u0634\u0627\u0645\u0644 \u0642\u0633\u0645\u062a \u062f\u0648\u0645 \u067e\u0627\u0631\u0627\u06af\u0631\u0627\u0641 \u0627\u0648\u0644 \u0645\u0642\u0627\u0644\u0647 \u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627 \u0627\u0633\u062a \u0631\u0648\u06cc \u0647\u0648\u0634 \u0645\u0635\u0646\u0648\u0639\u06cc.  \u0645\u0627 \u0645\u062a\u0646 \u0631\u0627 \u0628\u0647 \u0646\u0634\u0627\u0646\u0647 \u0647\u0627 \u062a\u0642\u0633\u06cc\u0645 \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u0648 \u0633\u067e\u0633 \u0628\u0647 \u0633\u0627\u062f\u06af\u06cc \u0622\u0646 \u0631\u0627 \u0641\u0631\u0627\u062e\u0648\u0627\u0646\u06cc \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>add_documents<\/code> \u0631\u0648\u0634\u06cc \u0628\u0631\u0627\u06cc \u0627\u0636\u0627\u0641\u0647 \u06a9\u0631\u062f\u0646 \u0646\u0634\u0627\u0646\u0647 \u0647\u0627 \u0628\u0647 \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0645\u0648\u062c\u0648\u062f \u0645\u0627.  \u0628\u0627\u0644\u0627\u062e\u0631\u0647 \u0645\u0627 print \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0628\u0647 \u0631\u0648\u0632 \u0634\u062f\u0647 \u0631\u0648\u06cc \u0631\u0627 console.<\/p>\n<p>\u062e\u0631\u0648\u062c\u06cc \u06a9\u062f \u0628\u0647 \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">The dictionary has: 65 tokens\n{'(AI),': 0, 'AI': 1, 'Computer': 2, 'In': 3, 'achieving': 4, 'actions': 5, 'agents:': 6, 'and': 7, 'animals.': 8, 'any': 9, 'artificial': 10, 'as': 11, 'by': 12, 'called': 13, 'chance': 14, 'computer': 15, 'contrast': 16, 'defines': 17, 'demonstrated': 18, 'device': 19, 'displayed': 20, 'environment': 21, 'goals.': 22, 'humans': 23, 'in': 24, 'intelligence': 25, 'intelligence,': 26, 'intelligent': 27, 'is': 28, 'its': 29, 'machine': 30, 'machines,': 31, 'maximize': 32, 'natural': 33, 'of': 34, 'perceives': 35, 'research': 36, 'science': 37, 'science,': 38, 'sometimes': 39, 'study': 40, 'successfully': 41, 'takes': 42, 'that': 43, 'the': 44, 'to': 45, '\"artificial': 46, '\"cognitive\"': 47, '\"learning\"': 48, '\"problem': 49, 'Colloquially,': 50, 'associate': 51, 'describe': 52, 'functions': 53, 'human': 54, 'intelligence\"': 55, 'machines': 56, 'mimic': 57, 'minds,': 58, 'other': 59, 'solving': 60, 'such': 61, 'term': 62, 'used': 63, 'with': 64}\n<\/code><\/pre>\n<p>\u0645\u06cc \u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u0627\u06a9\u0646\u0648\u0646 65 \u062a\u0648\u06a9\u0646 \u062f\u0631 \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u062e\u0648\u062f \u062f\u0627\u0631\u06cc\u0645\u060c \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u0642\u0628\u0644\u0627\u064b 45 \u062a\u0648\u06a9\u0646 \u062f\u0627\u0634\u062a\u06cc\u0645.<\/p>\n<h3 id=\"creatingdictionariesusingtextfiles\"><span class=\"ez-toc-section\" id=\"%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%d9%81%d8%b1%d9%87%d9%86%da%af_%d9%84%d8%ba%d8%aa_%d8%a8%d8%a7_%d8%a7%d8%b3%d8%aa%d9%81%d8%a7%d8%af%d9%87_%d8%a7%d8%b2_%d9%81%d8%a7%db%8c%d9%84_%d9%87%d8%a7%db%8c_%d9%85%d8%aa%d9%86%db%8c\"><\/span>\u0627\u06cc\u062c\u0627\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0641\u0627\u06cc\u0644 \u0647\u0627\u06cc \u0645\u062a\u0646\u06cc<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u062f\u0631 \u0642\u0633\u0645\u062a \u0642\u0628\u0644\u060c \u0645\u062a\u0646 \u062f\u0631\u0648\u0646 \u062d\u0627\u0641\u0638\u0647 \u062f\u0627\u0634\u062a\u06cc\u0645.  \u0627\u06af\u0631 \u0628\u062e\u0648\u0627\u0647\u06cc\u0645 \u0628\u0627 \u062e\u0648\u0627\u0646\u062f\u0646 \u06cc\u06a9 \u0641\u0627\u06cc\u0644 \u0645\u062a\u0646\u06cc \u0627\u0632 \u0631\u0648\u06cc \u0647\u0627\u0631\u062f \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc \u0628\u0633\u0627\u0632\u06cc\u0645 \u0686\u0637\u0648\u0631\u061f  \u0628\u0631\u0627\u06cc \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0632 <code>simple_process<\/code> \u0631\u0648\u0634 \u0627\u0632 <code>gensim.utils<\/code> \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647  \u0645\u0632\u06cc\u062a \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0627\u06cc\u0646 \u0631\u0648\u0634 \u0627\u06cc\u0646 \u0627\u0633\u062a \u06a9\u0647 \u0641\u0627\u06cc\u0644 \u0645\u062a\u0646\u06cc \u0631\u0627 \u062e\u0637 \u0628\u0647 \u062e\u0637 \u0645\u06cc \u062e\u0648\u0627\u0646\u062f \u0648 \u062a\u0648\u06a9\u0646 \u0647\u0627\u06cc \u0645\u0648\u062c\u0648\u062f \u062f\u0631 \u062e\u0637 \u0631\u0627 \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f.  \u0628\u0631\u0627\u06cc \u0627\u06cc\u062c\u0627\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0646\u06cc\u0627\u0632\u06cc \u0646\u06cc\u0633\u062a \u0641\u0627\u06cc\u0644 \u0645\u062a\u0646\u06cc \u06a9\u0627\u0645\u0644 \u0631\u0627 \u062f\u0631 \u062d\u0627\u0641\u0638\u0647 \u0628\u0627\u0631\u06af\u0630\u0627\u0631\u06cc \u06a9\u0646\u06cc\u062f.<\/p>\n<p>\u0642\u0628\u0644 \u0627\u0632 \u0627\u062c\u0631\u0627\u06cc \u0645\u062b\u0627\u0644 \u0628\u0639\u062f\u06cc\u060c \u06cc\u06a9 \u0641\u0627\u06cc\u0644 &#8220;file1.txt&#8221; \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f \u0648 \u0645\u062a\u0646 \u0632\u06cc\u0631 \u0631\u0627 \u0628\u0647 \u0641\u0627\u06cc\u0644 \u0627\u0636\u0627\u0641\u0647 \u06a9\u0646\u06cc\u062f (\u0627\u06cc\u0646 \u0646\u06cc\u0645\u0647 \u0627\u0648\u0644 \u067e\u0627\u0631\u0627\u06af\u0631\u0627\u0641 \u0627\u0648\u0644 \u0645\u0642\u0627\u0644\u0647 \u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627 \u0627\u0633\u062a. \u0631\u0648\u06cc \u06af\u0631\u0645\u0627\u06cc\u0634 \u062c\u0647\u0627\u0646\u06cc).<\/p>\n<pre><code class=\"hljs\">Global warming is a long-term rise in the average temperature of the Earth's climate system, an aspect of climate change shown by temperature measurements and by multiple effects of the warming. Though earlier geological periods also experienced episodes of warming, the term commonly refers to the observed and continuing increase in average air and ocean temperatures since 1900 caused mainly by emissions of greenhouse gasses in the modern industrial economy.\n<\/code><\/pre>\n<p>\u062d\u0627\u0644 \u0628\u06cc\u0627\u06cc\u06cc\u062f \u06cc\u06a9 \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u0645 \u06a9\u0647 \u062d\u0627\u0648\u06cc \u0646\u0634\u0627\u0646\u0647 \u0647\u0627\u06cc\u06cc \u0627\u0632 \u0641\u0627\u06cc\u0644 \u0645\u062a\u0646\u06cc &#8220;file1.txt&#8221; \u0628\u0627\u0634\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> gensim.utils <span class=\"hljs-keyword\">import<\/span> simple_preprocess\n<span class=\"hljs-keyword\">from<\/span> smart_open <span class=\"hljs-keyword\">import<\/span> smart_open\n<span class=\"hljs-keyword\">import<\/span> os\n\ngensim_dictionary = corpora.Dictionary(simple_preprocess(sentence, deacc=<span class=\"hljs-literal\">True<\/span>) <span class=\"hljs-keyword\">for<\/span> sentence <span class=\"hljs-keyword\">in<\/span> <span class=\"hljs-built_in\">open<\/span>(<span class=\"hljs-string\">r'E:\\\\text files\\\\file1.txt'<\/span>, encoding=<span class=\"hljs-string\">'utf-8'<\/span>))\n\n<span class=\"hljs-built_in\">print<\/span>(gensim_dictionary.token2id)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627 \u0641\u0627\u06cc\u0644 \u0645\u062a\u0646\u06cc &#8220;file1.txt&#8221; \u0631\u0627 \u062e\u0637 \u0628\u0647 \u062e\u0637 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 <code>simple_preprocess<\/code> \u0631\u0648\u0634.  \u0627\u06cc\u0646 \u0631\u0648\u0634 \u062a\u0648\u06a9\u0646 \u0647\u0627 \u0631\u0627 \u062f\u0631 \u0647\u0631 \u062e\u0637 \u0627\u0632 \u0633\u0646\u062f \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f.  \u0633\u067e\u0633 \u0627\u0632 \u0646\u0634\u0627\u0646\u0647 \u0647\u0627 \u0628\u0631\u0627\u06cc \u0627\u06cc\u062c\u0627\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f.  \u062f\u0631 \u062e\u0631\u0648\u062c\u06cc \u0628\u0627\u06cc\u062f \u062a\u0648\u06a9\u0646 \u0647\u0627 \u0648 \u0634\u0646\u0627\u0633\u0647 \u0647\u0627\u06cc \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0622\u0646 \u0647\u0627 \u0631\u0627 \u0645\u0627\u0646\u0646\u062f \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0645\u0634\u0627\u0647\u062f\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">{'average': 0, 'climate': 1, 'earth': 2, 'global': 3, 'in': 4, 'is': 5, 'long': 6, 'of': 7, 'rise': 8, 'system': 9, 'temperature': 10, 'term': 11, 'the': 12, 'warming': 13, 'an': 14, 'and': 15, 'aspect': 16, 'by': 17, 'change': 18, 'effects': 19, 'measurements': 20, 'multiple': 21, 'shown': 22, 'also': 23, 'earlier': 24, 'episodes': 25, 'experienced': 26, 'geological': 27, 'periods': 28, 'though': 29, 'air': 30, 'commonly': 31, 'continuing': 32, 'increase': 33, 'observed': 34, 'ocean': 35, 'refers': 36, 'temperatures': 37, 'to': 38, 'caused': 39, 'economy': 40, 'emissions': 41, 'gasses': 42, 'greenhouse': 43, 'industrial': 44, 'mainly': 45, 'modern': 46, 'since': 47}\n<\/code><\/pre>\n<p>\u0628\u0647 \u0647\u0645\u06cc\u0646 \u062a\u0631\u062a\u06cc\u0628\u060c \u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0628\u0627 \u062e\u0648\u0627\u0646\u062f\u0646 \u0686\u0646\u062f\u06cc\u0646 \u0641\u0627\u06cc\u0644 \u0645\u062a\u0646\u06cc \u06cc\u06a9 \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u0645.  \u0641\u0627\u06cc\u0644 \u062f\u06cc\u06af\u0631\u06cc \u0628\u0647 \u0646\u0627\u0645 &#8220;file2.txt&#8221; \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f \u0648 \u0645\u062a\u0646 \u0632\u06cc\u0631 \u0631\u0627 \u0628\u0647 \u0641\u0627\u06cc\u0644 \u0627\u0636\u0627\u0641\u0647 \u06a9\u0646\u06cc\u062f (\u0628\u062e\u0634 \u062f\u0648\u0645 \u067e\u0627\u0631\u0627\u06af\u0631\u0627\u0641 \u0627\u0648\u0644 \u0645\u0642\u0627\u0644\u0647 \u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627 \u0631\u0648\u06cc \u06af\u0631\u0645\u0627\u06cc\u0634 \u062c\u0647\u0627\u0646\u06cc):<\/p>\n<pre><code class=\"hljs\">In the modern context the terms global warming and climate change are commonly used interchangeably, but climate change includes both global warming and its effects, such as changes to precipitation and impacts that differ by region.(7)(8) Many of the observed warming changes since the 1950s are unprecedented in the instrumental temperature record, and in historical and paleoclimate proxy records of climate change over thousands to millions of years.\n<\/code><\/pre>\n<p>&#8220;file2.txt&#8221; \u0631\u0627 \u062f\u0631 \u0647\u0645\u0627\u0646 \u062f\u0627\u06cc\u0631\u06a9\u062a\u0648\u0631\u06cc &#8220;file1.txt&#8221; \u0630\u062e\u06cc\u0631\u0647 \u06a9\u0646\u06cc\u062f.<\/p>\n<p>\u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0647\u0631 \u062f\u0648 \u0641\u0627\u06cc\u0644 \u0631\u0627 \u0645\u06cc \u062e\u0648\u0627\u0646\u062f \u0648 \u0633\u067e\u0633 \u06cc\u06a9 \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc \u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0627\u06cc\u062c\u0627\u062f \u0645\u06cc \u06a9\u0646\u062f \u0631\u0648\u06cc \u0645\u062a\u0646 \u062f\u0631 \u062f\u0648 \u0641\u0627\u06cc\u0644:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> gensim.utils <span class=\"hljs-keyword\">import<\/span> simple_preprocess\n<span class=\"hljs-keyword\">from<\/span> smart_open <span class=\"hljs-keyword\">import<\/span> smart_open\n<span class=\"hljs-keyword\">import<\/span> os\n\n<span class=\"hljs-class\"><span class=\"hljs-keyword\">class<\/span> <span class=\"hljs-title\">ReturnTokens<\/span>(<span class=\"hljs-params\"><span class=\"hljs-built_in\">object<\/span><\/span>):<\/span>\n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">__init__<\/span>(<span class=\"hljs-params\">self, dir_path<\/span>):<\/span>\n        self.dir_path = dir_path\n\n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">__iter__<\/span>(<span class=\"hljs-params\">self<\/span>):<\/span>\n        <span class=\"hljs-keyword\">for<\/span> file_name <span class=\"hljs-keyword\">in<\/span> os.listdir(self.dir_path):\n            <span class=\"hljs-keyword\">for<\/span> sentence <span class=\"hljs-keyword\">in<\/span> <span class=\"hljs-built_in\">open<\/span>(os.path.join(self.dir_path, file_name), encoding=<span class=\"hljs-string\">'utf-8'<\/span>):\n                <span class=\"hljs-keyword\">yield<\/span> simple_preprocess(sentence)\n\npath_to_text_directory = <span class=\"hljs-string\">r\"E:\\text files\"<\/span>\ngensim_dictionary = corpora.Dictionary(ReturnTokens(path_to_text_directory))\n\n<span class=\"hljs-built_in\">print<\/span>(gensim_dictionary.token2id)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627 \u06cc\u06a9 \u0645\u062a\u062f \u062f\u0627\u0631\u06cc\u0645 <code>ReturnTokens<\/code>\u060c \u06a9\u0647 \u0645\u0633\u06cc\u0631 \u062f\u0627\u06cc\u0631\u06a9\u062a\u0648\u0631\u06cc \u062d\u0627\u0648\u06cc &#8220;file1.txt&#8221; \u0648 &#8220;file2.txt&#8221; \u0631\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u062a\u0646\u0647\u0627 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0645\u06cc \u06af\u06cc\u0631\u062f.  \u062f\u0631 \u062f\u0627\u062e\u0644 \u0645\u062a\u062f\u060c \u062a\u0645\u0627\u0645 \u0641\u0627\u06cc\u0644\u200c\u0647\u0627\u06cc \u0645\u0648\u062c\u0648\u062f \u062f\u0631 \u062f\u0627\u06cc\u0631\u06a9\u062a\u0648\u0631\u06cc \u0631\u0627 \u062a\u06a9\u0631\u0627\u0631 \u0645\u06cc\u200c\u06a9\u0646\u06cc\u0645 \u0648 \u0633\u067e\u0633 \u0647\u0631 \u0641\u0627\u06cc\u0644 \u0631\u0627 \u062e\u0637 \u0628\u0647 \u062e\u0637 \u0645\u06cc\u200c\u062e\u0648\u0627\u0646\u06cc\u0645.  \u0627\u06cc\u0646 <code>simple_preprocess<\/code> \u0645\u062a\u062f \u0628\u0631\u0627\u06cc \u0647\u0631 \u062e\u0637 \u0646\u0634\u0627\u0646\u0647 \u0647\u0627\u06cc\u06cc \u0627\u06cc\u062c\u0627\u062f \u0645\u06cc \u06a9\u0646\u062f.  \u0646\u0634\u0627\u0646\u0647 \u0647\u0627\u06cc \u0647\u0631 \u062e\u0637 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u0644\u0645\u0647 \u06a9\u0644\u06cc\u062f\u06cc &#8220;\u0628\u0627\u0632\u062f\u0647&#8221; \u0628\u0647 \u062a\u0627\u0628\u0639 \u0641\u0631\u0627\u062e\u0648\u0627\u0646 \u0628\u0627\u0632\u06af\u0631\u062f\u0627\u0646\u062f\u0647 \u0645\u06cc \u0634\u0648\u0646\u062f.<\/p>\n<p>\u062f\u0631 \u062e\u0631\u0648\u062c\u06cc \u0628\u0627\u06cc\u062f \u062a\u0648\u06a9\u0646 \u0647\u0627\u06cc \u0632\u06cc\u0631 \u0631\u0627 \u0628\u0647 \u0647\u0645\u0631\u0627\u0647 \u0634\u0646\u0627\u0633\u0647 \u0622\u0646\u0647\u0627 \u0645\u0634\u0627\u0647\u062f\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">{'average': 0, 'climate': 1, 'earth': 2, 'global': 3, 'in': 4, 'is': 5, 'long': 6, 'of': 7, 'rise': 8, 'system': 9, 'temperature': 10, 'term': 11, 'the': 12, 'warming': 13, 'an': 14, 'and': 15, 'aspect': 16, 'by': 17, 'change': 18, 'effects': 19, 'measurements': 20, 'multiple': 21, 'shown': 22, 'also': 23, 'earlier': 24, 'episodes': 25, 'experienced': 26, 'geological': 27, 'periods': 28, 'though': 29, 'air': 30, 'commonly': 31, 'continuing': 32, 'increase': 33, 'observed': 34, 'ocean': 35, 'refers': 36, 'temperatures': 37, 'to': 38, 'caused': 39, 'economy': 40, 'emissions': 41, 'gasses': 42, 'greenhouse': 43, 'industrial': 44, 'mainly': 45, 'modern': 46, 'since': 47, 'are': 48, 'context': 49, 'interchangeably': 50, 'terms': 51, 'used': 52, 'as': 53, 'both': 54, 'but': 55, 'changes': 56, 'includes': 57, 'its': 58, 'precipitation': 59, 'such': 60, 'differ': 61, 'impacts': 62, 'instrumental': 63, 'many': 64, 'record': 65, 'region': 66, 'that': 67, 'unprecedented': 68, 'historical': 69, 'millions': 70, 'over': 71, 'paleoclimate': 72, 'proxy': 73, 'records': 74, 'thousands': 75, 'years': 76}\n<\/code><\/pre>\n<h2 id=\"creatingbagofwordscorpus\"><span class=\"ez-toc-section\" id=\"%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%d9%85%d8%ac%d9%85%d9%88%d8%b9%d9%87_%da%a9%d9%84%d9%85%d8%a7%d8%aa\"><\/span>\u0627\u06cc\u062c\u0627\u062f \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0634\u0627\u0645\u0644 \u0646\u06af\u0627\u0634\u062a \u0628\u06cc\u0646 \u06a9\u0644\u0645\u0627\u062a \u0648 \u0645\u0642\u0627\u062f\u06cc\u0631 \u0639\u062f\u062f\u06cc \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0622\u0646\u0647\u0627\u0633\u062a.  \u0645\u062c\u0645\u0648\u0639\u0647\u200c\u0647\u0627\u06cc \u06a9\u06cc\u0633\u0647 \u06a9\u0644\u0645\u0627\u062a \u062f\u0631 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0628\u0631\u067e\u0627 \u0634\u062f\u0647\u200c\u0627\u0646\u062f \u0631\u0648\u06cc \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc \u0648 \u062d\u0627\u0648\u06cc \u0634\u0646\u0627\u0633\u0647 \u0647\u0631 \u06a9\u0644\u0645\u0647 \u0628\u0647 \u0647\u0645\u0631\u0627\u0647 \u062f\u0641\u0639\u0627\u062a \u0648\u0642\u0648\u0639 \u06a9\u0644\u0645\u0647 \u0627\u0633\u062a.<\/p>\n<h3 id=\"creatingbagofwordscorpusfrominmemoryobjects\"><span class=\"ez-toc-section\" id=\"%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%da%a9%db%8c%d8%b3%d9%87_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%a7%d8%b2_%d8%a7%d8%b4%db%8c%d8%a7%d8%a1_%d8%af%d8%b1%d9%88%d9%86_%d8%ad%d8%a7%d9%81%d8%b8%d9%87\"><\/span>\u0627\u06cc\u062c\u0627\u062f \u06a9\u06cc\u0633\u0647 \u06a9\u0644\u0645\u0627\u062a \u0627\u0632 \u0627\u0634\u06cc\u0627\u0621 \u062f\u0631\u0648\u0646 \u062d\u0627\u0641\u0638\u0647<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0628\u0647 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> gensim\n<span class=\"hljs-keyword\">from<\/span> gensim <span class=\"hljs-keyword\">import<\/span> corpora\n<span class=\"hljs-keyword\">from<\/span> pprint <span class=\"hljs-keyword\">import<\/span> pprint\n\ntext = (<span class=\"hljs-string\">\"\"\"In computer science, artificial intelligence (AI),\n           sometimes called machine intelligence, is intelligence\n           demonstrated by machines, in contrast to the natural intelligence\n           displayed by humans and animals. Computer science defines\n           AI research as the study of intelligent agents: any device that\n           perceives its environment and takes actions that maximize its chance\n           of successfully achieving its goals.\"\"\"<\/span>)\n\ntokens = ((token <span class=\"hljs-keyword\">for<\/span> token <span class=\"hljs-keyword\">in<\/span> sentence.split()) <span class=\"hljs-keyword\">for<\/span> sentence <span class=\"hljs-keyword\">in<\/span> text)\n\ngensim_dictionary = corpora.Dictionary()\ngensim_corpus = (gensim_dictionary.doc2bow(token, allow_update=<span class=\"hljs-literal\">True<\/span>) <span class=\"hljs-keyword\">for<\/span> token <span class=\"hljs-keyword\">in<\/span> tokens)\n\n<span class=\"hljs-built_in\">print<\/span>(gensim_corpus)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627\u060c \u0645\u062a\u0646\u06cc \u062f\u0627\u0631\u06cc\u0645 \u06a9\u0647 \u0628\u0647 \u062a\u0648\u06a9\u0646 \u0647\u0627 \u062a\u0642\u0633\u06cc\u0645 \u0645\u06cc \u06a9\u0646\u06cc\u0645.  \u0628\u0639\u062f\u060c a \u0631\u0627 \u0645\u0642\u062f\u0627\u0631\u062f\u0647\u06cc \u0627\u0648\u0644\u06cc\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>Dictionary<\/code> \u0634\u06cc \u0627\u0632 <code>corpora<\/code> \u0645\u062f\u0648\u0644.  \u0634\u06cc \u0634\u0627\u0645\u0644 \u06cc\u06a9 \u0645\u062a\u062f \u0627\u0633\u062a <code>doc2bow<\/code>\u060c \u06a9\u0647 \u0627\u0633\u0627\u0633\u0627\u064b \u062f\u0648 \u0648\u0638\u06cc\u0641\u0647 \u0631\u0627 \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc \u062f\u0647\u062f:<\/p>\n<ul>\n<li>\u062f\u0631 \u062a\u0645\u0627\u0645 \u06a9\u0644\u0645\u0627\u062a \u0645\u062a\u0646 \u062a\u06a9\u0631\u0627\u0631 \u0645\u06cc \u0634\u0648\u062f\u060c \u0627\u06af\u0631 \u06a9\u0644\u0645\u0647 \u0627\u0632 \u0642\u0628\u0644 \u062f\u0631 \u0628\u062f\u0646\u0647 \u0648\u062c\u0648\u062f \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u062f\u060c \u062a\u0639\u062f\u0627\u062f \u062f\u0641\u0639\u0627\u062a \u06a9\u0644\u0645\u0647 \u0631\u0627 \u0627\u0641\u0632\u0627\u06cc\u0634 \u0645\u06cc \u062f\u0647\u062f.<\/li>\n<li>\u062f\u0631 \u063a\u06cc\u0631 \u0627\u06cc\u0646 \u0635\u0648\u0631\u062a \u06a9\u0644\u0645\u0647 \u0631\u0627 \u0648\u0627\u0631\u062f \u0628\u062f\u0646\u0647 \u0645\u06cc \u06a9\u0646\u062f \u0648 \u062a\u0639\u062f\u0627\u062f \u0641\u0631\u06a9\u0627\u0646\u0633 \u0622\u0646 \u0631\u0627 \u0631\u0648\u06cc 1 \u0642\u0631\u0627\u0631 \u0645\u06cc \u062f\u0647\u062f<\/li>\n<\/ul>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627 \u0628\u0647 \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">(((0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1), (6, 1), (7, 2), (8, 1), (9, 1), (10, 1), (11, 1), (12, 2), (13, 1), (14, 1), (15, 1), (16, 1), (17, 1), (18, 1), (19, 1), (20, 1), (21, 1), (22, 1), (23, 1), (24, 1), (25, 3), (26, 1), (27, 1), (28, 1), (29, 3), (30, 1), (31, 1), (32, 1), (33, 1), (34, 2), (35, 1), (36, 1), (37, 1), (38, 1), (39, 1), (40, 1), (41, 1), (42, 1), (43, 2), (44, 2), (45, 1)))\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0645\u0645\u06a9\u0646 \u0627\u0633\u062a \u0628\u0631\u0627\u06cc \u0634\u0645\u0627 \u0645\u0646\u0637\u0642\u06cc \u0646\u0628\u0627\u0634\u062f.  \u0628\u0630\u0627\u0631 \u062a\u0648\u0636\u06cc\u062d\u0634 \u0628\u062f\u0645  \u062a\u0627\u067e\u0644 \u0627\u0648\u0644 (0\u060c1) \u0627\u0633\u0627\u0633\u0627\u064b \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0639\u0646\u06cc \u0627\u0633\u062a \u06a9\u0647 \u06a9\u0644\u0645\u0647 \u0628\u0627 \u0634\u0646\u0627\u0633\u0647 0 1 \u0628\u0627\u0631 \u062f\u0631 \u0645\u062a\u0646 \u0631\u062e \u062f\u0627\u062f\u0647 \u0627\u0633\u062a.  \u0628\u0647 \u0647\u0645\u06cc\u0646 \u062a\u0631\u062a\u06cc\u0628\u060c (25\u060c 3) \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0639\u0646\u06cc \u0627\u0633\u062a \u06a9\u0647 \u06a9\u0644\u0645\u0647 \u0628\u0627 \u0634\u0646\u0627\u0633\u0647 25 \u0633\u0647 \u0628\u0627\u0631 \u062f\u0631 \u0633\u0646\u062f \u0622\u0645\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<p>\u062d\u0627\u0644\u0627 \u0628\u06cc\u0627\u06cc\u06cc\u062f print \u06a9\u0644\u0645\u0647 \u0648 \u0641\u0631\u06a9\u0627\u0646\u0633 \u0628\u0631\u0627\u06cc \u0631\u0648\u0634\u0646 \u0634\u062f\u0646 \u0647\u0645\u0647 \u0686\u06cc\u0632 \u0645\u0647\u0645 \u0627\u0633\u062a.  \u062e\u0637\u0648\u0637 \u06a9\u062f \u0632\u06cc\u0631 \u0631\u0627 \u062f\u0631 \u0627\u0646\u062a\u0647\u0627\u06cc \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0642\u0628\u0644\u06cc \u0627\u0636\u0627\u0641\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">word_frequencies = (((gensim_dictionary(<span class=\"hljs-built_in\">id<\/span>), frequence) <span class=\"hljs-keyword\">for<\/span> <span class=\"hljs-built_in\">id<\/span>, frequence <span class=\"hljs-keyword\">in<\/span> couple) <span class=\"hljs-keyword\">for<\/span> couple <span class=\"hljs-keyword\">in<\/span> gensim_corpus)\n<span class=\"hljs-built_in\">print<\/span>(word_frequencies)\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0628\u0647 \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">((('(AI),', 1), ('AI', 1), ('Computer', 1), ('In', 1), ('achieving', 1), ('actions', 1), ('agents:', 1), ('and', 2), ('animals.', 1), ('any', 1), ('artificial', 1), ('as', 1), ('by', 2), ('called', 1), ('chance', 1), ('computer', 1), ('contrast', 1), ('defines', 1), ('demonstrated', 1), ('device', 1), ('displayed', 1), ('environment', 1), ('goals.', 1), ('humans', 1), ('in', 1), ('intelligence', 3), ('intelligence,', 1), ('intelligent', 1), ('is', 1), ('its', 3), ('machine', 1), ('machines,', 1), ('maximize', 1), ('natural', 1), ('of', 2), ('perceives', 1), ('research', 1), ('science', 1), ('science,', 1), ('sometimes', 1), ('study', 1), ('successfully', 1), ('takes', 1), ('that', 2), ('the', 2), ('to', 1)))\n<\/code><\/pre>\n<p>\u0627\u0632 \u062e\u0631\u0648\u062c\u06cc \u0645\u06cc \u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u06a9\u0644\u0645\u0647 &#8220;\u0647\u0648\u0634&#8221; \u0633\u0647 \u0628\u0627\u0631 \u0638\u0627\u0647\u0631 \u0645\u06cc \u0634\u0648\u062f.  \u0628\u0647 \u0647\u0645\u06cc\u0646 \u062a\u0631\u062a\u06cc\u0628\u060c \u06a9\u0644\u0645\u0647 &#8220;\u06a9\u0647&#8221; \u062f\u0648 \u0628\u0627\u0631 \u0638\u0627\u0647\u0631 \u0645\u06cc \u0634\u0648\u062f.<\/p>\n<h3 id=\"creatingbagofwordscorpusfromtextfiles\"><span class=\"ez-toc-section\" id=\"%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%da%a9%db%8c%d8%b3%d9%87_%da%a9%d9%84%d9%85%d8%a7%d8%aa_%d8%a7%d8%b2_%d9%81%d8%a7%db%8c%d9%84_%d9%87%d8%a7%db%8c_%d9%85%d8%aa%d9%86%db%8c\"><\/span>\u0627\u06cc\u062c\u0627\u062f \u06a9\u06cc\u0633\u0647 \u06a9\u0644\u0645\u0627\u062a \u0627\u0632 \u0641\u0627\u06cc\u0644 \u0647\u0627\u06cc \u0645\u062a\u0646\u06cc<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0645\u0627\u0646\u0646\u062f \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc \u0647\u0627\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 a \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u0645 <a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/en.wikipedia.org\/wiki\/Bag-of-words_model\">\u06a9\u06cc\u0633\u0647 \u06a9\u0644\u0645\u0627\u062a<\/a> \u067e\u06cc\u06a9\u0631\u0647 \u0628\u0627 \u062e\u0648\u0627\u0646\u062f\u0646 \u06cc\u06a9 \u0641\u0627\u06cc\u0644 \u0645\u062a\u0646\u06cc.  \u0628\u0647 \u06a9\u062f \u0632\u06cc\u0631 \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> gensim.utils <span class=\"hljs-keyword\">import<\/span> simple_preprocess\n<span class=\"hljs-keyword\">from<\/span> smart_open <span class=\"hljs-keyword\">import<\/span> smart_open\n<span class=\"hljs-keyword\">import<\/span> os\n\ntokens = (simple_preprocess(sentence, deacc=<span class=\"hljs-literal\">True<\/span>) <span class=\"hljs-keyword\">for<\/span> sentence <span class=\"hljs-keyword\">in<\/span> <span class=\"hljs-built_in\">open<\/span>(<span class=\"hljs-string\">r'E:\\text files\\file1.txt'<\/span>, encoding=<span class=\"hljs-string\">'utf-8'<\/span>))\n\ngensim_dictionary = corpora.Dictionary()\ngensim_corpus = (gensim_dictionary.doc2bow(token, allow_update=<span class=\"hljs-literal\">True<\/span>) <span class=\"hljs-keyword\">for<\/span> token <span class=\"hljs-keyword\">in<\/span> tokens)\nword_frequencies = (((gensim_dictionary(<span class=\"hljs-built_in\">id<\/span>), frequence) <span class=\"hljs-keyword\">for<\/span> <span class=\"hljs-built_in\">id<\/span>, frequence <span class=\"hljs-keyword\">in<\/span> couple) <span class=\"hljs-keyword\">for<\/span> couple <span class=\"hljs-keyword\">in<\/span> gensim_corpus)\n\n<span class=\"hljs-built_in\">print<\/span>(word_frequencies)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627\u060c \u0645\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u06cc \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u0627\u06cc\u062c\u0627\u062f \u06a9\u0631\u062f\u06cc\u0645 <code>file1.txt<\/code>.  \u062f\u0631 \u062e\u0631\u0648\u062c\u06cc \u0628\u0627\u06cc\u062f \u06a9\u0644\u0645\u0627\u062a \u067e\u0627\u0631\u0627\u06af\u0631\u0627\u0641 \u0627\u0648\u0644 \u0645\u0642\u0627\u0644\u0647 \u06af\u0631\u0645\u0627\u06cc\u0634 \u062c\u0647\u0627\u0646\u06cc \u0631\u0627 \u0628\u0628\u06cc\u0646\u06cc\u062f \u0631\u0648\u06cc \u0648\u06cc\u06a9\u06cc\u067e\u062f\u06cc\u0627.<\/p>\n<pre><code class=\"hljs\">((('average', 1), ('climate', 1), ('earth', 1), ('global', 1), ('in', 1), ('is', 1), ('long', 1), ('of', 1), ('rise', 1), ('system', 1), ('temperature', 1), ('term', 1), ('the', 2), ('warming', 1)), (('climate', 1), ('of', 2), ('temperature', 1), ('the', 1), ('warming', 1), ('an', 1), ('and', 1), ('aspect', 1), ('by', 2), ('change', 1), ('effects', 1), ('measurements', 1), ('multiple', 1), ('shown', 1)), (('of', 1), ('warming', 1), ('also', 1), ('earlier', 1), ('episodes', 1), ('experienced', 1), ('geological', 1), ('periods', 1), ('though', 1)), (('average', 1), ('in', 1), ('term', 1), ('the', 2), ('and', 2), ('air', 1), ('commonly', 1), ('continuing', 1), ('increase', 1), ('observed', 1), ('ocean', 1), ('refers', 1), ('temperatures', 1), ('to', 1)), (('in', 1), ('of', 1), ('the', 1), ('by', 1), ('caused', 1), ('economy', 1), ('emissions', 1), ('gasses', 1), ('greenhouse', 1), ('industrial', 1), ('mainly', 1), ('modern', 1), ('since', 1)))\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f \u06a9\u0647 \u06a9\u0644\u0645\u0627\u062a\u06cc \u0645\u0627\u0646\u0646\u062f &#8220;of&#8221;\u060c &#8220;the&#8221;\u060c &#8220;by&#8221; \u0648 &#8220;and&#8221; \u062f\u0648 \u0628\u0627\u0631 \u062a\u06a9\u0631\u0627\u0631 \u0645\u06cc \u0634\u0648\u0646\u062f.<\/p>\n<p>\u0628\u0647 \u0637\u0648\u0631 \u0645\u0634\u0627\u0628\u0647\u060c \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0686\u0646\u062f\u06cc\u0646 \u0641\u0627\u06cc\u0644 \u0645\u062a\u0646\u06cc\u060c \u0645\u0627\u0646\u0646\u062f \u0634\u06a9\u0644 \u0632\u06cc\u0631\u060c \u0645\u062c\u0645\u0648\u0639\u0647\u200c\u0627\u06cc \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> gensim.utils <span class=\"hljs-keyword\">import<\/span> simple_preprocess\n<span class=\"hljs-keyword\">from<\/span> smart_open <span class=\"hljs-keyword\">import<\/span> smart_open\n<span class=\"hljs-keyword\">import<\/span> os\n\n<span class=\"hljs-class\"><span class=\"hljs-keyword\">class<\/span> <span class=\"hljs-title\">ReturnTokens<\/span>(<span class=\"hljs-params\"><span class=\"hljs-built_in\">object<\/span><\/span>):<\/span>\n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">__init__<\/span>(<span class=\"hljs-params\">self, dir_path<\/span>):<\/span>\n        self.dir_path = dir_path\n\n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">__iter__<\/span>(<span class=\"hljs-params\">self<\/span>):<\/span>\n        <span class=\"hljs-keyword\">for<\/span> file_name <span class=\"hljs-keyword\">in<\/span> os.listdir(self.dir_path):\n            <span class=\"hljs-keyword\">for<\/span> sentence <span class=\"hljs-keyword\">in<\/span> <span class=\"hljs-built_in\">open<\/span>(os.path.join(self.dir_path, file_name), encoding=<span class=\"hljs-string\">'utf-8'<\/span>):\n                <span class=\"hljs-keyword\">yield<\/span> simple_preprocess(sentence)\n\npath_to_text_directory = <span class=\"hljs-string\">r\"E:\\text files\"<\/span>\n\ngensim_dictionary = corpora.Dictionary()\ngensim_corpus = (gensim_dictionary.doc2bow(token, allow_update=<span class=\"hljs-literal\">True<\/span>) <span class=\"hljs-keyword\">for<\/span> token <span class=\"hljs-keyword\">in<\/span> ReturnTokens(path_to_text_directory))\nword_frequencies = (((gensim_dictionary(<span class=\"hljs-built_in\">id<\/span>), frequence) <span class=\"hljs-keyword\">for<\/span> <span class=\"hljs-built_in\">id<\/span>, frequence <span class=\"hljs-keyword\">in<\/span> couple) <span class=\"hljs-keyword\">for<\/span> couple <span class=\"hljs-keyword\">in<\/span> gensim_corpus)\n\n<span class=\"hljs-built_in\">print<\/span>(word_frequencies)\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627 \u0628\u0647 \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">((('average', 1), ('climate', 1), ('earth', 1), ('global', 1), ('in', 1), ('is', 1), ('long', 1), ('of', 1), ('rise', 1), ('system', 1), ('temperature', 1), ('term', 1), ('the', 2), ('warming', 1)), (('climate', 1), ('of', 2), ('temperature', 1), ('the', 1), ('warming', 1), ('an', 1), ('and', 1), ('aspect', 1), ('by', 2), ('change', 1), ('effects', 1), ('measurements', 1), ('multiple', 1), ('shown', 1)), (('of', 1), ('warming', 1), ('also', 1), ('earlier', 1), ('episodes', 1), ('experienced', 1), ('geological', 1), ('periods', 1), ('though', 1)), (('average', 1), ('in', 1), ('term', 1), ('the', 2), ('and', 2), ('air', 1), ('commonly', 1), ('continuing', 1), ('increase', 1), ('observed', 1), ('ocean', 1), ('refers', 1), ('temperatures', 1), ('to', 1)), (('in', 1), ('of', 1), ('the', 1), ('by', 1), ('caused', 1), ('economy', 1), ('emissions', 1), ('gasses', 1), ('greenhouse', 1), ('industrial', 1), ('mainly', 1), ('modern', 1), ('since', 1)), (('climate', 1), ('global', 1), ('in', 1), ('the', 2), ('warming', 1), ('and', 1), ('change', 1), ('commonly', 1), ('modern', 1), ('are', 1), ('context', 1), ('interchangeably', 1), ('terms', 1), ('used', 1)), (('climate', 1), ('global', 1), ('warming', 1), ('and', 2), ('change', 1), ('effects', 1), ('to', 1), ('as', 1), ('both', 1), ('but', 1), ('changes', 1), ('includes', 1), ('its', 1), ('precipitation', 1), ('such', 1)), (('in', 1), ('of', 1), ('temperature', 1), ('the', 3), ('warming', 1), ('by', 1), ('observed', 1), ('since', 1), ('are', 1), ('changes', 1), ('differ', 1), ('impacts', 1), ('instrumental', 1), ('many', 1), ('record', 1), ('region', 1), ('that', 1), ('unprecedented', 1)), (('climate', 1), ('in', 1), ('of', 2), ('and', 2), ('change', 1), ('to', 1), ('historical', 1), ('millions', 1), ('over', 1), ('paleoclimate', 1), ('proxy', 1), ('records', 1), ('thousands', 1), ('years', 1)))\n<\/code><\/pre>\n<h2 id=\"creatingtfidfcorpus\"><span class=\"ez-toc-section\" id=\"%d8%a7%db%8c%d8%ac%d8%a7%d8%af_tf-idf_corpus\"><\/span>\u0627\u06cc\u062c\u0627\u062f TF-IDF Corpus<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0631\u0648\u06cc\u06a9\u0631\u062f \u06a9\u06cc\u0633\u0647 \u06a9\u0644\u0645\u0627\u062a \u0628\u0631\u0627\u06cc \u062a\u0628\u062f\u06cc\u0644 \u0645\u062a\u0646 \u0628\u0647 \u0627\u0639\u062f\u0627\u062f \u062e\u0648\u0628 \u0639\u0645\u0644 \u0645\u06cc \u06a9\u0646\u062f.  \u0628\u0627 \u0627\u06cc\u0646 \u062d\u0627\u0644\u060c \u06cc\u06a9 \u0627\u06cc\u0631\u0627\u062f \u062f\u0627\u0631\u062f.  \u0628\u0631 \u0627\u0633\u0627\u0633 \u06a9\u0644\u0645\u0647 \u0628\u0647 \u06cc\u06a9 \u0646\u0645\u0631\u0647 \u0627\u0645\u062a\u06cc\u0627\u0632 \u0645\u06cc \u062f\u0647\u062f \u0631\u0648\u06cc \u0648\u0642\u0648\u0639 \u0622\u0646 \u062f\u0631 \u06cc\u06a9 \u0633\u0646\u062f \u062e\u0627\u0635  \u0627\u06cc\u0646 \u0648\u0627\u0642\u0639\u06cc\u062a \u0631\u0627 \u062f\u0631 \u0646\u0638\u0631 \u0646\u0645\u06cc \u06af\u06cc\u0631\u062f \u06a9\u0647 \u0645\u0645\u06a9\u0646 \u0627\u0633\u062a \u0627\u06cc\u0646 \u06a9\u0644\u0645\u0647 \u062f\u0631 \u0627\u0633\u0646\u0627\u062f \u062f\u06cc\u06af\u0631 \u0646\u06cc\u0632 \u0641\u0631\u0627\u0648\u0627\u0646\u06cc \u0641\u0631\u0627\u0648\u0627\u0646\u06cc \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u062f. <a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/en.wikipedia.org\/wiki\/Tf%E2%80%93idf\">TF-IDF<\/a> \u0627\u06cc\u0646 \u0645\u0648\u0636\u0648\u0639 \u0631\u0627 \u062d\u0644 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u0627\u0635\u0637\u0644\u0627\u062d \u0641\u0631\u06a9\u0627\u0646\u0633 \u0628\u0647 \u0635\u0648\u0631\u062a \u0632\u06cc\u0631 \u0645\u062d\u0627\u0633\u0628\u0647 \u0645\u06cc \u0634\u0648\u062f:<\/p>\n<pre><code class=\"hljs\">Term frequency = (Frequency of the word in a document)\/(Total words in the document)\n<\/code><\/pre>\n<p>\u0648 \u0641\u0631\u06a9\u0627\u0646\u0633 \u0645\u0639\u06a9\u0648\u0633 \u0633\u0646\u062f \u0628\u0647 \u0635\u0648\u0631\u062a \u0632\u06cc\u0631 \u0645\u062d\u0627\u0633\u0628\u0647 \u0645\u06cc \u0634\u0648\u062f:<\/p>\n<pre><code class=\"hljs\">IDF(word) = Log((Total number of documents)\/(Number of documents containing the word))\n<\/code><\/pre>\n<p>\u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0628\u0647 \u0631\u0627\u062d\u062a\u06cc \u06cc\u06a9 \u0645\u062c\u0645\u0648\u0639\u0647 TF-IDF \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> gensim\n<span class=\"hljs-keyword\">from<\/span> gensim <span class=\"hljs-keyword\">import<\/span> corpora\n<span class=\"hljs-keyword\">from<\/span> pprint <span class=\"hljs-keyword\">import<\/span> pprint\n\ntext = (<span class=\"hljs-string\">\"I like to play Football\"<\/span>,\n       <span class=\"hljs-string\">\"Football is the best game\"<\/span>,\n       <span class=\"hljs-string\">\"Which game do you like to play ?\"<\/span>)\n\ntokens = ((token <span class=\"hljs-keyword\">for<\/span> token <span class=\"hljs-keyword\">in<\/span> sentence.split()) <span class=\"hljs-keyword\">for<\/span> sentence <span class=\"hljs-keyword\">in<\/span> text)\n\ngensim_dictionary = corpora.Dictionary()\ngensim_corpus = (gensim_dictionary.doc2bow(token, allow_update=<span class=\"hljs-literal\">True<\/span>) <span class=\"hljs-keyword\">for<\/span> token <span class=\"hljs-keyword\">in<\/span> tokens)\n\n<span class=\"hljs-keyword\">from<\/span> gensim <span class=\"hljs-keyword\">import<\/span> models\n<span class=\"hljs-keyword\">import<\/span> numpy <span class=\"hljs-keyword\">as<\/span> np\n\ntfidf = models.TfidfModel(gensim_corpus, smartirs=<span class=\"hljs-string\">'ntc'<\/span>)\n\n<span class=\"hljs-keyword\">for<\/span> sent <span class=\"hljs-keyword\">in<\/span> tfidf(gensim_corpus):\n    <span class=\"hljs-built_in\">print<\/span>(((gensim_dictionary(<span class=\"hljs-built_in\">id<\/span>), np.around(frequency, decimals=<span class=\"hljs-number\">2<\/span>)) <span class=\"hljs-keyword\">for<\/span> <span class=\"hljs-built_in\">id<\/span>, frequency <span class=\"hljs-keyword\">in<\/span> sent))\n<\/code><\/pre>\n<p>\u0628\u0631\u0627\u06cc \u06cc\u0627\u0641\u062a\u0646 \u0645\u0642\u062f\u0627\u0631 TF-IDF \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0632 <code>TfidfModel<\/code> \u06a9\u0644\u0627\u0633 \u0627\u0632 <code>models<\/code> \u0645\u0627\u0698\u0648\u0644 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim.  \u0645\u0627 \u0628\u0647 \u0633\u0627\u062f\u06af\u06cc \u0628\u0627\u06cc\u062f \u06a9\u06cc\u0633\u0647 \u067e\u06cc\u06a9\u0631\u0647 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u06cc\u06a9 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0628\u0647 \u0633\u0627\u0632\u0646\u062f\u0647 the \u067e\u0627\u0633 \u062f\u0647\u06cc\u0645 <code>TfidfModel<\/code> \u06a9\u0644\u0627\u0633  \u062f\u0631 \u062e\u0631\u0648\u062c\u06cc\u060c \u062a\u0645\u0627\u0645 \u06a9\u0644\u0645\u0627\u062a \u0645\u0648\u062c\u0648\u062f \u062f\u0631 \u0633\u0647 \u062c\u0645\u0644\u0647 \u0631\u0627 \u0628\u0647 \u0647\u0645\u0631\u0627\u0647 \u0645\u0642\u0627\u062f\u06cc\u0631 TF-IDF \u0622\u0646\u0647\u0627 \u062e\u0648\u0627\u0647\u06cc\u062f \u062f\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">(('Football', 0.3), ('I', 0.8), ('like', 0.3), ('play', 0.3), ('to', 0.3))\n(('Football', 0.2), ('best', 0.55), ('game', 0.2), ('is', 0.55), ('the', 0.55))\n(('like', 0.17), ('play', 0.17), ('to', 0.17), ('game', 0.17), ('?', 0.47), ('Which', 0.47), ('do', 0.47), ('you', 0.47))\n<\/code><\/pre>\n<h2 id=\"downloadingbuiltingensimmodelsanddatasets\"><span class=\"ez-toc-section\" id=\"%d8%af%d8%a7%d9%86%d9%84%d9%88%d8%af_%d9%85%d8%af%d9%84%e2%80%8c%d9%87%d8%a7_%d9%88_%d9%85%d8%ac%d9%85%d9%88%d8%b9%d9%87_%d8%af%d8%a7%d8%af%d9%87%e2%80%8c%d9%87%d8%a7%db%8c_%d8%af%d8%a7%d8%ae%d9%84%db%8c_gensim\"><\/span>\u062f\u0627\u0646\u0644\u0648\u062f \u0645\u062f\u0644\u200c\u0647\u0627 \u0648 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647\u200c\u0647\u0627\u06cc \u062f\u0627\u062e\u0644\u06cc Gensim<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>Gensim \u0628\u0627 \u0645\u062c\u0645\u0648\u0639\u0647\u200c\u0647\u0627\u06cc \u062f\u0627\u062f\u0647 \u062f\u0627\u062e\u0644\u06cc \u0648 \u0645\u062f\u0644\u200c\u0647\u0627\u06cc \u062c\u0627\u0633\u0627\u0632\u06cc \u06a9\u0644\u0645\u0647 \u0627\u0631\u0627\u0626\u0647 \u0645\u06cc\u200c\u0634\u0648\u062f \u06a9\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646 \u0645\u0633\u062a\u0642\u06cc\u0645\u0627\u064b \u0627\u0632 \u0622\u0646\u0647\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0631\u062f.<\/p>\n<p>\u0628\u0631\u0627\u06cc \u062f\u0627\u0646\u0644\u0648\u062f \u06cc\u06a9 \u0645\u062f\u0644 \u062f\u0627\u062e\u0644\u06cc \u06cc\u0627 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0632 <code>downloader<\/code> \u06a9\u0644\u0627\u0633 \u0627\u0632 <code>gensim<\/code> \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647  \u0633\u067e\u0633 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0631\u0648\u0634 \u0628\u0627\u0631\u06af\u0630\u0627\u0631\u06cc \u0631\u0627 \u0641\u0631\u0627\u062e\u0648\u0627\u0646\u06cc \u06a9\u0646\u06cc\u0645 \u0631\u0648\u06cc \u0631\u0627 <code>downloader<\/code> \u06a9\u0644\u0627\u0633 \u0628\u0631\u0627\u06cc \u062f\u0627\u0646\u0644\u0648\u062f \u0628\u0633\u062a\u0647 \u0645\u0648\u0631\u062f \u0646\u0638\u0631.  \u0628\u0647 \u06a9\u062f \u0632\u06cc\u0631 \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> gensim.downloader <span class=\"hljs-keyword\">as<\/span> api\n\nw2v_embedding = api.load(<span class=\"hljs-string\">\"glove-wiki-gigaword-100\"<\/span>)\n<\/code><\/pre>\n<p>\u0628\u0627 \u062f\u0633\u062a\u0648\u0631\u0627\u062a \u0628\u0627\u0644\u0627\u060c \u0645\u062f\u0644 \u062a\u0639\u0628\u06cc\u0647 \u06a9\u0644\u0645\u0647 &#8220;glove-wiki-gigaword-100&#8221; \u0631\u0627 \u062f\u0627\u0646\u0644\u0648\u062f \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u06a9\u0647 \u0627\u0633\u0627\u0633\u0627 \u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0622\u0646 \u0627\u0633\u062a. \u0631\u0648\u06cc \u0645\u062a\u0646 \u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627 \u0648 100 \u0628\u0639\u062f\u06cc \u0627\u0633\u062a.  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0633\u0639\u06cc \u06a9\u0646\u06cc\u0645 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0645\u062f\u0644 \u062a\u0639\u0628\u06cc\u0647 \u06a9\u0644\u0645\u0647 \u062e\u0648\u062f\u060c \u06a9\u0644\u0645\u0627\u062a \u0645\u0634\u0627\u0628\u0647 &#8220;\u062a\u0648\u06cc\u0648\u062a\u0627&#8221; \u0631\u0627 \u067e\u06cc\u062f\u0627 \u06a9\u0646\u06cc\u0645.  \u0628\u0631\u0627\u06cc \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0627\u0632 \u06a9\u062f \u0632\u06cc\u0631 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">w2v_embedding.most_similar(<span class=\"hljs-string\">'toyota'<\/span>)\n<\/code><\/pre>\n<p>\u062f\u0631 \u062e\u0631\u0648\u062c\u06cc \u0628\u0627\u06cc\u062f \u0646\u062a\u0627\u06cc\u062c \u0632\u06cc\u0631 \u0631\u0627 \u0645\u0634\u0627\u0647\u062f\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">(('honda', 0.8739858865737915),\n ('nissan', 0.8108116984367371),\n ('automaker', 0.7918163537979126),\n ('mazda', 0.7687169313430786),\n ('bmw', 0.7616022825241089),\n ('ford', 0.7547588348388672),\n ('motors', 0.7539199590682983),\n ('volkswagen', 0.7176680564880371),\n ('prius', 0.7156582474708557),\n ('chrysler', 0.7085398435592651))\n<\/code><\/pre>\n<p>\u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u062a\u0645\u0627\u0645 \u0646\u062a\u0627\u06cc\u062c \u0628\u0633\u06cc\u0627\u0631 \u0645\u0631\u062a\u0628\u0637 \u0628\u0627 \u06a9\u0644\u0645\u0647 &#8220;toyota&#8221; \u0647\u0633\u062a\u0646\u062f.  \u0639\u062f\u062f \u0645\u0648\u062c\u0648\u062f \u062f\u0631 \u06a9\u0633\u0631 \u0628\u0627 \u0634\u0627\u062e\u0635 \u0634\u0628\u0627\u0647\u062a \u0645\u0637\u0627\u0628\u0642\u062a \u062f\u0627\u0631\u062f.  \u0634\u0627\u062e\u0635 \u062a\u0634\u0627\u0628\u0647 \u0628\u0627\u0644\u0627\u062a\u0631 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0639\u0646\u06cc \u0627\u0633\u062a \u06a9\u0647 \u06a9\u0644\u0645\u0647 \u0645\u0631\u062a\u0628\u0637 \u062a\u0631 \u0627\u0633\u062a.<\/p>\n<h2 id=\"conclusion\"><span class=\"ez-toc-section\" id=\"%d9%86%d8%aa%db%8c%d8%ac%d9%87\"><\/span>\u0646\u062a\u06cc\u062c\u0647<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u06cc\u06a9\u06cc \u0627\u0632 \u0645\u062d\u0628\u0648\u0628 \u062a\u0631\u06cc\u0646 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc NLP \u0627\u0633\u062a.  \u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647\u060c \u0628\u0647 \u0637\u0648\u0631 \u062e\u0644\u0627\u0635\u0647 \u0628\u0631\u0631\u0633\u06cc \u06a9\u0631\u062f\u06cc\u0645 \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0628\u0631\u0627\u06cc \u0627\u0646\u062c\u0627\u0645 \u06a9\u0627\u0631\u0647\u0627\u06cc\u06cc \u0645\u0627\u0646\u0646\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0648 \u0627\u06cc\u062c\u0627\u062f \u067e\u06cc\u06a9\u0631\u0647 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0631\u062f.  \u0647\u0645\u0686\u0646\u06cc\u0646 \u0631\u0648\u0634 \u062f\u0627\u0646\u0644\u0648\u062f \u0645\u0627\u0698\u0648\u0644 \u0647\u0627\u06cc \u062f\u0627\u062e\u0644\u06cc Gensim \u0631\u0627 \u062f\u06cc\u062f\u06cc\u0645.  \u062f\u0631 \u0645\u0642\u0627\u0644\u0647 \u0628\u0639\u062f\u06cc\u060c \u0631\u0648\u0634 \u0627\u0646\u062c\u0627\u0645 \u0645\u062f\u0644\u200c\u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0627\u0632 \u0637\u0631\u06cc\u0642 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0631\u0627 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u06cc\u062f.<\/p>\n<\/div>\n<p><script>\n                        !function(f,b,e,v,n,t,s)\n                        {if(f.fbq)return;n=f.fbq=function(){n.callMethod?\n                        n.callMethod.apply(n,arguments):n.queue.push(arguments)};\n                        if(!f._fbq)f._fbq=n;n.push=n;n.loaded=!0;n.version='2.0';\n                        n.queue=();t=b.createElement(e);t.async=!0;\n                        t.src=v;s=b.getElementsByTagName(e)(0);\n                        s.parentNode.insertBefore(t,s)}(window, document,'script',\n                        'https:\/\/connect.facebook.net\/en_US\/fbevents.js');\n                        fbq('init', '525232124909042');\n                        fbq('track', 'PageView');\n                    <\/script>    (\u0628\u0631\u0686\u0633\u0628\u200c\u0647\u0627 \u0628\u0647 \u062a\u0631\u062c\u0645\u0647)# python<br \/>\n<br \/><br \/>\n<br \/>\u0645\u0646\u062a\u0634\u0631 \u0634\u062f\u0647 \u062f\u0631 1403-01-23 02:48:05<br \/>\n<\/p>\n\n\n<div class=\"kk-star-ratings kksr-auto kksr-align-center kksr-valign-bottom\"\n    data-payload='{&quot;align&quot;:&quot;center&quot;,&quot;id&quot;:&quot;16288&quot;,&quot;slug&quot;:&quot;default&quot;,&quot;valign&quot;:&quot;bottom&quot;,&quot;ignore&quot;:&quot;&quot;,&quot;reference&quot;:&quot;auto&quot;,&quot;class&quot;:&quot;&quot;,&quot;count&quot;:&quot;0&quot;,&quot;legendonly&quot;:&quot;&quot;,&quot;readonly&quot;:&quot;&quot;,&quot;score&quot;:&quot;0&quot;,&quot;starsonly&quot;:&quot;&quot;,&quot;best&quot;:&quot;5&quot;,&quot;gap&quot;:&quot;5&quot;,&quot;greet&quot;:&quot;\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628&quot;,&quot;legend&quot;:&quot;0\\\/5 (0 \u0631\u0627\u06cc)&quot;,&quot;size&quot;:&quot;30&quot;,&quot;title&quot;:&quot;\u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc NLP: \u06a9\u0627\u0631 \u0628\u0627 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim (\u0642\u0633\u0645\u062a 1)&quot;,&quot;width&quot;:&quot;0&quot;,&quot;_legend&quot;:&quot;{score}\\\/{best} ({count} \u0631\u0627\u06cc)&quot;,&quot;font_factor&quot;:&quot;1.25&quot;}'>\n            \n<div class=\"kksr-stars\">\n    \n<div class=\"kksr-stars-inactive\">\n            <div class=\"kksr-star\" data-star=\"1\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"2\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"3\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"4\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"5\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n    \n<div class=\"kksr-stars-active\" style=\"width: 0px;\">\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n<\/div>\n                \n\n<div class=\"kksr-legend\" style=\"font-size: 24px;\">\n            <span class=\"kksr-muted\">\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628<\/span>\n    <\/div>\n    <\/div>\n","protected":false},"excerpt":{"rendered":"<p><span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 13<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span>\u0627\u06cc\u0646 \u062f\u0647\u0645\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0627\u0632 \u0633\u0631\u06cc \u0645\u0642\u0627\u0644\u0627\u062a \u0645\u0646 \u0627\u0633\u062a \u0631\u0648\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc NLP. \u062f\u0631 \u0645\u0642\u0627\u0644\u0647 \u0642\u0628\u0644\u06cc \u062e\u0648\u062f \u062a\u0648\u0636\u06cc\u062d \u062f\u0627\u062f\u0645 \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 StanfordCoreNLP \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u0628\u0631\u0627\u06cc \u0627\u0646\u062c\u0627\u0645 \u0648\u0638\u0627\u06cc\u0641 \u0645\u062e\u062a\u0644\u0641 NLP \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0634\u0648\u062f. \u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0628\u0647 \u0628\u0631\u0631\u0633\u06cc \u0622\u0646 \u0645\u06cc \u067e\u0631\u062f\u0627\u0632\u06cc\u0645 \u062c\u0646\u0633\u06cc\u0645 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647\u060c \u06a9\u0647 \u06cc\u06a9\u06cc \u062f\u06cc\u06af\u0631 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc \u0628\u0633\u06cc\u0627\u0631 \u0645\u0641\u06cc\u062f NLP \u0628\u0631\u0627\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646 \u0627\u0633\u062a. Gensim \u062f\u0631 [&hellip;]<\/p>\n","protected":false},"author":3,"featured_media":9162,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1743,620,1686],"tags":[],"class_list":["post-16288","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-python","category-programming","category-ai"],"acf":[],"_links":{"self":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/16288","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/comments?post=16288"}],"version-history":[{"count":0,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/16288\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media\/9162"}],"wp:attachment":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media?parent=16288"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/categories?post=16288"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/tags?post=16288"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}