{"id":16267,"date":"2024-01-22T22:06:23","date_gmt":"2024-01-22T18:36:23","guid":{"rendered":"https:\/\/rasanegar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2\/"},"modified":"2024-01-22T22:06:23","modified_gmt":"2024-01-22T18:36:23","slug":"%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2","status":"publish","type":"post","link":"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2\/","title":{"rendered":"\u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc NLP: \u06a9\u0627\u0631 \u0628\u0627 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim (\u0628\u062e\u0634 2)"},"content":{"rendered":"<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_85 counter-hierarchy ez-toc-counter ez-toc-custom ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\"><p class=\"ez-toc-title\" style=\"cursor:inherit\">\u0633\u0631\u0641\u0635\u0644\u0647\u0627\u06cc \u0645\u0637\u0644\u0628<\/p>\n<\/div><nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2\/#%d9%86%d8%b5%d8%a8_%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87_%d9%87%d8%a7%db%8c_%d9%85%d9%88%d8%b1%d8%af_%d9%86%db%8c%d8%a7%d8%b2\" >\u0646\u0635\u0628 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc \u0645\u0648\u0631\u062f \u0646\u06cc\u0627\u0632<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2\/#%d9%85%d8%af%d9%84_%d8%b3%d8%a7%d8%b2%db%8c_%d9%85%d9%88%d8%b6%d9%88%d8%b9_%d8%a8%d8%a7_lda\" >\u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0628\u0627 LDA<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2\/#%d8%ae%d8%b1%d8%a7%d8%b4_%d8%af%d8%a7%d8%af%d9%86_%d9%85%d9%82%d8%a7%d9%84%d8%a7%d8%aa_%d9%88%db%8c%da%a9%db%8c_%d9%be%d8%af%db%8c%d8%a7\" >\u062e\u0631\u0627\u0634 \u062f\u0627\u062f\u0646 \u0645\u0642\u0627\u0644\u0627\u062a \u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2\/#%d9%be%db%8c%d8%b4_%d9%be%d8%b1%d8%af%d8%a7%d8%b2%d8%b4_%d8%af%d8%a7%d8%af%d9%87_%d9%87%d8%a7\" >\u067e\u06cc\u0634 \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u0627\u062f\u0647 \u0647\u0627<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2\/#%d9%85%d9%88%d8%b6%d9%88%d8%b9%d8%a7%d8%aa_%d9%85%d8%af%d9%84%d8%b3%d8%a7%d8%b2%db%8c\" >\u0645\u0648\u0636\u0648\u0639\u0627\u062a \u0645\u062f\u0644\u0633\u0627\u0632\u06cc<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2\/#%d8%a7%d8%b1%d8%b2%db%8c%d8%a7%d8%a8%db%8c_%d9%85%d8%af%d9%84_lda\" >\u0627\u0631\u0632\u06cc\u0627\u0628\u06cc \u0645\u062f\u0644 LDA<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2\/#%d8%aa%d8%ac%d8%b3%d9%85_lda\" >\u062a\u062c\u0633\u0645 LDA<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2\/#%d9%85%d8%af%d9%84_%d8%b3%d8%a7%d8%b2%db%8c_%d9%85%d9%88%d8%b6%d9%88%d8%b9_%d8%a7%d8%b2_%d8%b7%d8%b1%db%8c%d9%82_lsi\" >\u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0627\u0632 \u0637\u0631\u06cc\u0642 LSI<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%b1%d8%a7%db%8c-nlp-%da%a9%d8%a7%d8%b1-%d8%a8%d8%a7-%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87-gensim-%d8%a8%d8%ae%d8%b4-2\/#%d9%86%d8%aa%db%8c%d8%ac%d9%87\" >\u0646\u062a\u06cc\u062c\u0647<\/a><\/li><\/ul><\/nav><\/div>\n<span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 10<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span><p> <br \/>\n<\/p>\n<div><noscript><\/noscript><\/p>\n<p>\u0627\u06cc\u0646 \u06cc\u0627\u0632\u062f\u0647\u0645\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0645\u0646 \u0627\u0632 \u0633\u0631\u06cc \u0645\u0642\u0627\u0644\u0627\u062a \u0627\u0633\u062a \u0631\u0648\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc NLP \u0648 \u0645\u0642\u0627\u0644\u0647 \u062f\u0648\u0645 \u0631\u0648\u06cc \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u062f\u0631 \u0627\u06cc\u0646 \u0645\u062c\u0645\u0648\u0639\u0647.  \u062f\u0631 \u0645\u0642\u0627\u0644\u0647 \u0642\u0628\u0644\u06cc\u060c \u0645\u0639\u0631\u0641\u06cc \u0645\u062e\u062a\u0635\u0631\u06cc \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u067e\u0627\u06cc\u062a\u0648\u0646 \u0627\u0631\u0627\u0626\u0647 \u062f\u0627\u062f\u0645.  \u0645\u0646 \u062a\u0648\u0636\u06cc\u062d \u062f\u0627\u062f\u0645 \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u0645 \u0641\u0631\u0647\u0646\u06af\u200c\u0647\u0627\u06cc \u0644\u063a\u062a \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u0645 \u06a9\u0647 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u0628\u0647 \u0634\u0646\u0627\u0633\u0647\u200c\u0647\u0627\u06cc \u0639\u062f\u062f\u06cc \u0645\u062a\u0646\u0627\u0638\u0631\u0634\u0627\u0646 \u0646\u06af\u0627\u0634\u062a \u0645\u06cc\u200c\u06a9\u0646\u0646\u062f.  \u0645\u0627 \u0628\u06cc\u0634\u062a\u0631 \u062f\u0631 \u0645\u0648\u0631\u062f \u0686\u06af\u0648\u0646\u06af\u06cc \u0627\u06cc\u062c\u0627\u062f \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u06cc \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u0627\u0632 \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0628\u062d\u062b \u06a9\u0631\u062f\u06cc\u0645.  \u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0628\u0647 \u0628\u0631\u0631\u0633\u06cc \u0631\u0648\u0634 \u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0645\u06cc \u067e\u0631\u062f\u0627\u0632\u06cc\u0645.<\/p>\n<p>\u0631\u0648\u0634 \u0627\u0646\u062c\u0627\u0645 \u0645\u062f\u0644\u200c\u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Scikit-Learn \u067e\u0627\u06cc\u062a\u0648\u0646 \u0631\u0627 \u062f\u0631 \u0645\u0642\u0627\u0644\u0647 \u0642\u0628\u0644\u06cc \u062e\u0648\u062f \u062a\u0648\u0636\u06cc\u062d \u062f\u0627\u062f\u0647\u200c\u0627\u0645.  \u062f\u0631 \u0622\u0646 \u0645\u0642\u0627\u0644\u0647 \u0631\u0648\u0634 \u0627\u0646\u062c\u0627\u0645 \u0622\u0646 \u0631\u0627 \u062a\u0648\u0636\u06cc\u062d \u062f\u0627\u062f\u0645 <a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/en.wikipedia.org\/wiki\/Latent_Dirichlet_allocation\">\u062a\u062e\u0635\u06cc\u0635 \u062f\u06cc\u0631\u06cc\u06a9\u0644\u0647 \u0646\u0647\u0641\u062a\u0647<\/a> (LDA) \u0648 <a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/en.wikipedia.org\/wiki\/Non-negative_matrix_factorization\">\u0641\u0627\u06a9\u062a\u0648\u0631\u0633\u0627\u0632\u06cc \u0645\u0627\u062a\u0631\u06cc\u0633 \u063a\u06cc\u0631 \u0645\u0646\u0641\u06cc<\/a> (NMF) \u0631\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0628\u0631\u0627\u06cc \u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0631\u062f.<\/p>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0628\u0631\u0627\u06cc \u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f.  \u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627\u06cc \u0645\u0648\u0631\u062f \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0628\u0631\u0627\u06cc \u0645\u062f\u0644\u200c\u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639\u060c LDA \u0648 <a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/en.wikipedia.org\/wiki\/Latent_semantic_analysis\">LSI<\/a> (\u0646\u0645\u0627\u06cc\u0647 \u0633\u0627\u0632\u06cc \u0645\u0639\u0646\u0627\u06cc\u06cc \u0646\u0647\u0641\u062a\u0647).<\/p>\n<h2 id=\"installingrequiredlibraries\"><span class=\"ez-toc-section\" id=\"%d9%86%d8%b5%d8%a8_%da%a9%d8%aa%d8%a7%d8%a8%d8%ae%d8%a7%d9%86%d9%87_%d9%87%d8%a7%db%8c_%d9%85%d9%88%d8%b1%d8%af_%d9%86%db%8c%d8%a7%d8%b2\"><\/span>\u0646\u0635\u0628 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc \u0645\u0648\u0631\u062f \u0646\u06cc\u0627\u0632<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0645\u0627 \u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0631\u0627 \u0627\u0646\u062c\u0627\u0645 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u0627\u062f \u0631\u0648\u06cc \u0645\u062a\u0646 \u0628\u0647 \u062f\u0633\u062a \u0622\u0645\u062f\u0647 \u0627\u0632 \u0645\u0642\u0627\u0644\u0627\u062a \u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627.  \u0628\u0631\u0627\u06cc \u062e\u0631\u0627\u0634 \u062f\u0627\u062f\u0646 \u0645\u0642\u0627\u0644\u0627\u062a \u0648\u06cc\u06a9\u06cc\u200c\u067e\u062f\u06cc\u0627\u060c \u0627\u0632 API \u0648\u06cc\u06a9\u06cc\u200c\u067e\u062f\u06cc\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u06a9\u0646\u06cc\u0645.  \u0628\u0631\u0627\u06cc \u062f\u0627\u0646\u0644\u0648\u062f \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 API \u0648\u06cc\u06a9\u06cc\u200c\u067e\u062f\u06cc\u0627\u060c \u062f\u0633\u062a\u0648\u0631 \u0632\u06cc\u0631 \u0631\u0627 \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-meta\">$<\/span><span class=\"bash\"> pip install wikipedia<\/span>\n<\/code><\/pre>\n<p>\u062f\u0631 \u063a\u06cc\u0631 \u0627\u06cc\u0646 \u0635\u0648\u0631\u062a\u060c \u0627\u06af\u0631 \u0627\u0632 \u062a\u0648\u0632\u06cc\u0639 Anaconda \u067e\u0627\u06cc\u062a\u0648\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u062f\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u0632 \u06cc\u06a9\u06cc \u0627\u0632 \u062f\u0633\u062a\u0648\u0631\u0627\u062a \u0632\u06cc\u0631 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-meta\">$<\/span><span class=\"bash\"> conda install -c conda-forge wikipedia<\/span>\n<span class=\"hljs-meta\">$<\/span><span class=\"bash\"> conda install -c conda-forge\/label\/cf201901 wikipedia<\/span>\n<\/code><\/pre>\n<p>\u0628\u0631\u0627\u06cc \u062a\u062c\u0633\u0645 \u0645\u062f\u0644 \u0645\u0648\u0636\u0648\u0639 \u062e\u0648\u062f\u060c \u0627\u0632 \u0622\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f <code>pyLDAvis<\/code> \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647  \u0628\u0631\u0627\u06cc \u062f\u0627\u0646\u0644\u0648\u062f \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0645\u0648\u0627\u0631\u062f \u0632\u06cc\u0631 \u0631\u0627 \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f pip \u062f\u0633\u062a\u0648\u0631:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-meta\">$<\/span><span class=\"bash\"> pip install pyLDAvis<\/span>\n<\/code><\/pre>\n<p>\u062f\u0648\u0628\u0627\u0631\u0647\u060c \u0627\u06af\u0631 \u0628\u0647 \u062c\u0627\u06cc \u0622\u0646 \u0627\u0632 \u062a\u0648\u0632\u06cc\u0639 Anaconda \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u06cc\u06a9\u06cc \u0627\u0632 \u062f\u0633\u062a\u0648\u0631\u0627\u062a \u0632\u06cc\u0631 \u0631\u0627 \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-meta\">$<\/span><span class=\"bash\"> conda install -c conda-forge pyldavis<\/span>\n<span class=\"hljs-meta\">$<\/span><span class=\"bash\"> conda install -c conda-forge\/label\/gcc7 pyldavis<\/span>\n<span class=\"hljs-meta\">$<\/span><span class=\"bash\"> conda install -c conda-forge\/label\/cf201901 pyldavis<\/span>\n<\/code><\/pre>\n<h2 id=\"topicmodelingwithlda\"><span class=\"ez-toc-section\" id=\"%d9%85%d8%af%d9%84_%d8%b3%d8%a7%d8%b2%db%8c_%d9%85%d9%88%d8%b6%d9%88%d8%b9_%d8%a8%d8%a7_lda\"><\/span>\u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0628\u0627 LDA<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0628\u062e\u0634\u060c \u0645\u062f\u0644\u200c\u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639\u06cc \u0645\u0642\u0627\u0644\u0627\u062a \u0648\u06cc\u06a9\u06cc\u200c\u067e\u062f\u06cc\u0627 \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 LDA \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc\u200c\u062f\u0647\u06cc\u0645.<\/p>\n<p>\u0686\u0647\u0627\u0631 \u0645\u0642\u0627\u0644\u0647 \u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627 \u0631\u0627 \u062f\u0627\u0646\u0644\u0648\u062f \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f \u0631\u0648\u06cc \u0645\u0648\u0636\u0648\u0639\u0627\u062a &#8220;\u06af\u0631\u0645\u0627\u06cc\u0634 \u062c\u0647\u0627\u0646\u06cc&#8221;\u060c &#8220;\u0647\u0648\u0634 \u0645\u0635\u0646\u0648\u0639\u06cc&#8221;\u060c &#8220;\u0628\u0631\u062c \u0627\u06cc\u0641\u0644&#8221; \u0648 &#8220;\u0645\u0648\u0646\u0627\u0644\u06cc\u0632\u0627&#8221;.  \u062f\u0631 \u0645\u0631\u062d\u0644\u0647 \u0628\u0639\u062f\u060c \u0645\u0642\u0627\u0644\u0627\u062a \u0631\u0627 \u067e\u06cc\u0634 \u067e\u0631\u062f\u0627\u0632\u0634 \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u0648 \u0633\u067e\u0633 \u0645\u0631\u062d\u0644\u0647 \u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0631\u0627 \u062f\u0646\u0628\u0627\u0644 \u0645\u06cc \u06a9\u0646\u06cc\u0645.  \u062f\u0631 \u0646\u0647\u0627\u06cc\u062a\u060c \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u06cc\u062f \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0645\u062f\u0644 LDA \u0631\u0627 \u062a\u062c\u0633\u0645 \u06a9\u0646\u06cc\u0645.<\/p>\n<h3 id=\"scrapingwikipediaarticles\"><span class=\"ez-toc-section\" id=\"%d8%ae%d8%b1%d8%a7%d8%b4_%d8%af%d8%a7%d8%af%d9%86_%d9%85%d9%82%d8%a7%d9%84%d8%a7%d8%aa_%d9%88%db%8c%da%a9%db%8c_%d9%be%d8%af%db%8c%d8%a7\"><\/span>\u062e\u0631\u0627\u0634 \u062f\u0627\u062f\u0646 \u0645\u0642\u0627\u0644\u0627\u062a \u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0631\u0627 \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> wikipedia\n<span class=\"hljs-keyword\">import<\/span> nltk\n\nnltk.download(<span class=\"hljs-string\">'stopwords'<\/span>)\nen_stop = <span class=\"hljs-built_in\">set<\/span>(nltk.corpus.stopwords.words(<span class=\"hljs-string\">'english'<\/span>))\n\nglobal_warming = wikipedia.page(<span class=\"hljs-string\">\"Global Warming\"<\/span>)\nartificial_intelligence = wikipedia.page(<span class=\"hljs-string\">\"Artificial Intelligence\"<\/span>)\nmona_lisa = wikipedia.page(<span class=\"hljs-string\">\"Mona Lisa\"<\/span>)\neiffel_tower = wikipedia.page(<span class=\"hljs-string\">\"Eiffel Tower\"<\/span>)\n\ncorpus = (global_warming.content, artificial_intelligence.content, mona_lisa.content, eiffel_tower.content)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627\u060c \u0645\u0627 \u0627\u0628\u062a\u062f\u0627 import \u0631\u0627 <code>wikipedia<\/code> \u0648 <code>nltk<\/code> \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627  \u0627\u0646\u06af\u0644\u06cc\u0633\u06cc \u0631\u0627 \u0647\u0645 \u062f\u0627\u0646\u0644\u0648\u062f \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>nltk<\/code> \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641  \u0628\u0639\u062f\u0627\u064b \u0627\u0632 \u0627\u06cc\u0646 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f.<\/p>\n<p>\u062f\u0631 \u0645\u0631\u062d\u0644\u0647 \u0628\u0639\u062f\u060c \u0645\u0642\u0627\u0644\u0647 \u0631\u0627 \u0627\u0632 \u0648\u06cc\u06a9\u06cc\u200c\u067e\u062f\u06cc\u0627 \u0628\u0627 \u062a\u0639\u06cc\u06cc\u0646 \u0645\u0648\u0636\u0648\u0639 \u062f\u0627\u0646\u0644\u0648\u062f \u06a9\u0631\u062f\u06cc\u0645 <code>page<\/code> \u0645\u0648\u0636\u0648\u0639 \u0627\u0632 <code>wikipedia<\/code> \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647  \u0634\u06cc\u0621 \u0628\u0631\u06af\u0634\u062a\u06cc \u062d\u0627\u0648\u06cc \u0627\u0637\u0644\u0627\u0639\u0627\u062a\u06cc \u062f\u0631 \u0645\u0648\u0631\u062f \u062f\u0627\u0646\u0644\u0648\u062f \u0634\u062f\u0647 \u0627\u0633\u062a page.<\/p>\n<p>\u0628\u0631\u0627\u06cc \u0628\u0627\u0632\u06cc\u0627\u0628\u06cc \u0645\u062d\u062a\u0648\u06cc\u0627\u062a \u0635\u0641\u062d\u0647 \u0648\u0628\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0632 <code>content<\/code> \u0635\u0641\u062a.  \u0645\u062d\u062a\u0648\u0627\u06cc \u0647\u0631 \u0686\u0647\u0627\u0631 \u0645\u0642\u0627\u0644\u0647 \u062f\u0631 \u0644\u06cc\u0633\u062a \u0646\u0627\u0645\u06af\u0630\u0627\u0631\u06cc \u0634\u062f\u0647 \u0630\u062e\u06cc\u0631\u0647 \u0645\u06cc \u0634\u0648\u062f <code>corpus<\/code>.<\/p>\n<h3 id=\"datapreprocessing\"><span class=\"ez-toc-section\" id=\"%d9%be%db%8c%d8%b4_%d9%be%d8%b1%d8%af%d8%a7%d8%b2%d8%b4_%d8%af%d8%a7%d8%af%d9%87_%d9%87%d8%a7\"><\/span>\u067e\u06cc\u0634 \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u0627\u062f\u0647 \u0647\u0627<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0628\u0631\u0627\u06cc \u0627\u0646\u062c\u0627\u0645 \u0645\u062f\u0644\u200c\u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0627\u0632 \u0637\u0631\u06cc\u0642 LDA\u060c \u0628\u0647 \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc \u062f\u0627\u062f\u0647 \u0648 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u0646\u06cc\u0627\u0632 \u062f\u0627\u0631\u06cc\u0645.  \u0627\u0632 \u0622\u062e\u0631\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 (\u067e\u06cc\u0648\u0646\u062f \u0628\u0627\u0644\u0627)\u060c \u0645\u06cc \u062f\u0627\u0646\u06cc\u0645 \u06a9\u0647 \u0628\u0631\u0627\u06cc \u0627\u06cc\u062c\u0627\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0648 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u0628\u0647 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc\u06cc \u0628\u0647 \u0634\u06a9\u0644 \u0646\u0634\u0627\u0646\u0647 \u0646\u06cc\u0627\u0632 \u062f\u0627\u0631\u06cc\u0645.<\/p>\n<p>\u0639\u0644\u0627\u0648\u0647 \u0628\u0631 \u0627\u06cc\u0646\u060c \u0645\u0627 \u0628\u0627\u06cc\u062f \u0645\u0648\u0627\u0631\u062f\u06cc \u0645\u0627\u0646\u0646\u062f \u0639\u0644\u0627\u0626\u0645 \u0646\u0642\u0637\u0647 \u06af\u0630\u0627\u0631\u06cc \u0631\u0627 \u062d\u0630\u0641 \u06a9\u0646\u06cc\u0645 \u0648 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u062e\u0648\u062f \u0645\u062a\u0648\u0642\u0641 \u06a9\u0646\u06cc\u0645.  \u0628\u0631\u0627\u06cc \u06cc\u06a9\u0646\u0648\u0627\u062e\u062a\u06cc\u060c \u062a\u0645\u0627\u0645 \u0646\u0634\u0627\u0646\u0647 \u0647\u0627 \u0631\u0627 \u0628\u0647 \u062d\u0631\u0648\u0641 \u06a9\u0648\u0686\u06a9 \u062a\u0628\u062f\u06cc\u0644 \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u0648 \u0647\u0645\u0686\u0646\u06cc\u0646 \u0622\u0646\u0647\u0627 \u0631\u0627 \u0628\u0647 \u0635\u0648\u0631\u062a \u0644\u0645\u0627\u062a\u06cc\u0632\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645.  \u0647\u0645\u0686\u0646\u06cc\u0646\u060c \u062a\u0645\u0627\u0645 \u0646\u0634\u0627\u0646\u0647 \u0647\u0627\u06cc\u06cc \u06a9\u0647 \u06a9\u0645\u062a\u0631 \u0627\u0632 5 \u06a9\u0627\u0631\u0627\u06a9\u062a\u0631 \u062f\u0627\u0631\u0646\u062f \u0631\u0627 \u062d\u0630\u0641 \u0645\u06cc \u06a9\u0646\u06cc\u0645.<\/p>\n<p>\u0628\u0647 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> re\n<span class=\"hljs-keyword\">from<\/span> nltk.stem <span class=\"hljs-keyword\">import<\/span> WordNetLemmatizer\n\nstemmer = WordNetLemmatizer()\n\n<span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">preprocess_text<\/span>(<span class=\"hljs-params\">document<\/span>):<\/span>\n        \n        document = re.sub(<span class=\"hljs-string\">r'\\W'<\/span>, <span class=\"hljs-string\">' '<\/span>, <span class=\"hljs-built_in\">str<\/span>(document))\n\n        \n        document = re.sub(<span class=\"hljs-string\">r'\\s+(a-zA-Z)\\s+'<\/span>, <span class=\"hljs-string\">' '<\/span>, document)\n\n        \n        document = re.sub(<span class=\"hljs-string\">r'\\^(a-zA-Z)\\s+'<\/span>, <span class=\"hljs-string\">' '<\/span>, document)\n\n        \n        document = re.sub(<span class=\"hljs-string\">r'\\s+'<\/span>, <span class=\"hljs-string\">' '<\/span>, document, flags=re.I)\n\n        \n        document = re.sub(<span class=\"hljs-string\">r'^b\\s+'<\/span>, <span class=\"hljs-string\">''<\/span>, document)\n\n        \n        document = document.lower()\n\n        \n        tokens = document.split()\n        tokens = (stemmer.lemmatize(word) <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> tokens)\n        tokens = (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> tokens <span class=\"hljs-keyword\">if<\/span> word <span class=\"hljs-keyword\">not<\/span> <span class=\"hljs-keyword\">in<\/span> en_stop)\n        tokens = (word <span class=\"hljs-keyword\">for<\/span> word <span class=\"hljs-keyword\">in<\/span> tokens <span class=\"hljs-keyword\">if<\/span> <span class=\"hljs-built_in\">len<\/span>(word)  &gt; <span class=\"hljs-number\">5<\/span>)\n\n        <span class=\"hljs-keyword\">return<\/span> tokens\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627 \u0645\u062a\u062f\u06cc \u0628\u0647 \u0646\u0627\u0645 \u0627\u06cc\u062c\u0627\u062f \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>preprocess_text<\/code> \u06a9\u0647 \u06cc\u06a9 \u0633\u0646\u062f \u0645\u062a\u0646\u06cc \u0631\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0645\u06cc \u067e\u0630\u06cc\u0631\u062f.  \u0627\u06cc\u0646 \u0631\u0648\u0634 \u0627\u0632 \u0639\u0645\u0644\u06cc\u0627\u062a regex \u0628\u0631\u0627\u06cc \u0627\u0646\u062c\u0627\u0645 \u06a9\u0627\u0631\u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u06a9\u0646\u062f.  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0628\u0647 \u0637\u0648\u0631 \u062e\u0644\u0627\u0635\u0647 \u0622\u0646\u0686\u0647 \u0631\u0627 \u06a9\u0647 \u062f\u0631 \u062a\u0627\u0628\u0639 \u0628\u0627\u0644\u0627 \u0627\u062a\u0641\u0627\u0642 \u0645\u06cc \u0627\u0641\u062a\u062f \u0645\u0631\u0648\u0631 \u06a9\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\">document = re.sub(<span class=\"hljs-string\">r'\\W'<\/span>, <span class=\"hljs-string\">' '<\/span>, <span class=\"hljs-built_in\">str<\/span>(X(sen)))\n<\/code><\/pre>\n<p>\u062e\u0637 \u0628\u0627\u0644\u0627 \u062c\u0627\u06cc\u06af\u0632\u06cc\u0646 \u062a\u0645\u0627\u0645 \u06a9\u0627\u0631\u0627\u06a9\u062a\u0631\u0647\u0627 \u0648 \u0627\u0639\u062f\u0627\u062f \u062e\u0627\u0635 \u0628\u0627 \u06cc\u06a9 \u0641\u0627\u0635\u0644\u0647 \u0645\u06cc \u0634\u0648\u062f.  \u0628\u0627 \u0627\u06cc\u0646 \u062d\u0627\u0644\u060c \u0648\u0642\u062a\u06cc \u0639\u0644\u0627\u0626\u0645 \u0646\u06af\u0627\u0631\u0634\u06cc \u0631\u0627 \u062d\u0630\u0641 \u0645\u06cc \u06a9\u0646\u06cc\u062f\u060c \u06a9\u0627\u0631\u0627\u06a9\u062a\u0631\u0647\u0627\u06cc \u0645\u0646\u0641\u0631\u062f \u0628\u062f\u0648\u0646 \u0645\u0639\u0646\u06cc \u062f\u0631 \u0645\u062a\u0646 \u0638\u0627\u0647\u0631 \u0645\u06cc \u0634\u0648\u0646\u062f.  \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0645\u062b\u0627\u0644\u060c \u0632\u0645\u0627\u0646\u06cc \u06a9\u0647 \u0634\u0645\u0627 \u0639\u0644\u0627\u0626\u0645 \u0646\u06af\u0627\u0631\u0634\u06cc \u0631\u0627 \u062f\u0631 \u0645\u062a\u0646 \u062c\u0627\u06cc\u06af\u0632\u06cc\u0646 \u0645\u06cc \u06a9\u0646\u06cc\u062f <code>Eiffel's<\/code>\u060c \u06a9\u0644\u0645\u0627\u062a <code>Eiffel<\/code> \u0648 <code>s<\/code> \u0628\u0647 \u0646\u0638\u0631 \u0645\u06cc \u0631\u0633\u062f.  \u0627\u06cc\u0646\u062c\u0627 <code>s<\/code> \u0645\u0639\u0646\u06cc \u0646\u062f\u0627\u0631\u062f\u060c \u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646 \u0628\u0627\u06cc\u062f \u0622\u0646 \u0631\u0627 \u0628\u0627 \u0641\u0636\u0627 \u062c\u0627\u06cc\u06af\u0632\u06cc\u0646 \u06a9\u0646\u06cc\u0645.  \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0631\u0627 \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc \u062f\u0647\u062f:<\/p>\n<pre><code class=\"hljs\">document = re.sub(<span class=\"hljs-string\">r'\\s+(a-zA-Z)\\s+'<\/span>, <span class=\"hljs-string\">' '<\/span>, document)\n<\/code><\/pre>\n<p>\u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0641\u0648\u0642 \u0641\u0642\u0637 \u06a9\u0627\u0631\u0627\u06a9\u062a\u0631\u0647\u0627\u06cc \u062a\u06a9\u06cc \u0631\u0627 \u062f\u0631 \u0645\u062a\u0646 \u062d\u0630\u0641 \u0645\u06cc \u06a9\u0646\u062f.  \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u06cc\u06a9 \u06a9\u0627\u0631\u0627\u06a9\u062a\u0631 \u062f\u0631 \u0627\u0628\u062a\u062f\u0627\u06cc \u0645\u062a\u0646 \u0627\u0632 \u06a9\u062f \u0632\u06cc\u0631 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f.<\/p>\n<pre><code class=\"hljs\">document = re.sub(<span class=\"hljs-string\">r'\\^(a-zA-Z)\\s+'<\/span>, <span class=\"hljs-string\">' '<\/span>, document)\n<\/code><\/pre>\n<p>\u0647\u0646\u06af\u0627\u0645\u06cc \u06a9\u0647 \u062a\u06a9 \u0641\u0627\u0635\u0644\u0647 \u0647\u0627\u06cc \u0645\u062a\u0646 \u0631\u0627 \u062d\u0630\u0641 \u0645\u06cc \u06a9\u0646\u06cc\u062f\u060c \u0686\u0646\u062f\u06cc\u0646 \u0641\u0627\u0635\u0644\u0647 \u062e\u0627\u0644\u06cc \u0638\u0627\u0647\u0631 \u0645\u06cc \u0634\u0648\u0646\u062f.  \u06a9\u062f \u0632\u06cc\u0631 \u0686\u0646\u062f\u06cc\u0646 \u0641\u0636\u0627\u06cc \u062e\u0627\u0644\u06cc \u0631\u0627 \u0628\u0627 \u06cc\u06a9 \u0641\u0627\u0635\u0644\u0647 \u062c\u0627\u06cc\u06af\u0632\u06cc\u0646 \u0645\u06cc \u06a9\u0646\u062f:<\/p>\n<pre><code class=\"hljs\">document = re.sub(<span class=\"hljs-string\">r'\\s+'<\/span>, <span class=\"hljs-string\">' '<\/span>, document, flags=re.I)\n<\/code><\/pre>\n<p>\u0648\u0642\u062a\u06cc \u06cc\u06a9 \u0633\u0646\u062f \u0631\u0627 \u0628\u0647 \u0635\u0648\u0631\u062a \u0622\u0646\u0644\u0627\u06cc\u0646 \u062e\u0631\u0627\u0634 \u0645\u06cc \u062f\u0647\u06cc\u062f\u060c \u06cc\u06a9 \u0631\u0634\u062a\u0647 <code>b<\/code> \u0627\u063a\u0644\u0628 \u0628\u0647 \u0633\u0646\u062f \u0636\u0645\u06cc\u0645\u0647 \u0645\u06cc \u0634\u0648\u062f \u06a9\u0647 \u0628\u0647 \u0645\u0639\u0646\u0627\u06cc \u0628\u0627\u06cc\u0646\u0631\u06cc \u0628\u0648\u062f\u0646 \u0633\u0646\u062f \u0627\u0633\u062a.  \u0628\u0631\u0627\u06cc \u062d\u0630\u0641 \u067e\u06cc\u0634\u0648\u0646\u062f <code>b<\/code>\u060c \u0627\u0632 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f:<\/p>\n<pre><code class=\"hljs\">document = re.sub(<span class=\"hljs-string\">r'^b\\s+'<\/span>, <span class=\"hljs-string\">''<\/span>, document)\n<\/code><\/pre>\n<p>\u0628\u0642\u06cc\u0647 \u0631\u0648\u0634 \u062e\u0648\u062f \u062a\u0648\u0636\u06cc\u062d\u06cc \u0627\u0633\u062a.  \u0633\u0646\u062f \u0628\u0647 \u062d\u0631\u0648\u0641 \u06a9\u0648\u0686\u06a9 \u062a\u0628\u062f\u06cc\u0644 \u0645\u06cc \u0634\u0648\u062f \u0648 \u0633\u067e\u0633 \u0628\u0647 \u062a\u0648\u06a9\u0646 \u062a\u0642\u0633\u06cc\u0645 \u0645\u06cc \u0634\u0648\u062f.  \u0646\u0634\u0627\u0646\u0647 \u0647\u0627 \u0628\u0647 \u0635\u0648\u0631\u062a \u0644\u0645\u0627\u062a\u06cc\u0632\u0647 \u0645\u06cc \u0634\u0648\u0646\u062f \u0648 \u06a9\u0644\u0645\u0627\u062a \u062a\u0648\u0642\u0641 \u062d\u0630\u0641 \u0645\u06cc \u0634\u0648\u0646\u062f.  \u062f\u0631 \u0646\u0647\u0627\u06cc\u062a\u060c \u062a\u0645\u0627\u0645 \u0646\u0634\u0627\u0646\u0647 \u0647\u0627\u06cc\u06cc \u06a9\u0647 \u06a9\u0645\u062a\u0631 \u0627\u0632 \u067e\u0646\u062c \u06a9\u0627\u0631\u0627\u06a9\u062a\u0631 \u062f\u0627\u0631\u0646\u062f \u0646\u0627\u062f\u06cc\u062f\u0647 \u06af\u0631\u0641\u062a\u0647 \u0645\u06cc \u0634\u0648\u0646\u062f.  \u0628\u0642\u06cc\u0647 \u0646\u0634\u0627\u0646\u0647 \u0647\u0627 \u0628\u0647 \u062a\u0627\u0628\u0639 \u0641\u0631\u0627\u062e\u0648\u0627\u0646\u06cc \u0628\u0627\u0632\u06af\u0631\u062f\u0627\u0646\u062f\u0647 \u0645\u06cc \u0634\u0648\u0646\u062f.<\/p>\n<h3 id=\"modelingtopics\"><span class=\"ez-toc-section\" id=\"%d9%85%d9%88%d8%b6%d9%88%d8%b9%d8%a7%d8%aa_%d9%85%d8%af%d9%84%d8%b3%d8%a7%d8%b2%db%8c\"><\/span>\u0645\u0648\u0636\u0648\u0639\u0627\u062a \u0645\u062f\u0644\u0633\u0627\u0632\u06cc<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0627\u06cc\u0646 \u0628\u062e\u0634 \u06af\u0648\u0634\u062a \u0645\u0642\u0627\u0644\u0647 \u0627\u0633\u062a.  \u062f\u0631 \u0627\u06cc\u0646\u062c\u0627 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u06cc\u062f \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0627\u0632 \u062a\u0627\u0628\u0639 \u062f\u0627\u062e\u0644\u06cc \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u0628\u0631\u0627\u06cc \u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0631\u062f.  \u0627\u0645\u0627 \u0642\u0628\u0644 \u0627\u0632 \u0622\u0646\u060c \u0645\u0627 \u0628\u0627\u06cc\u062f \u0645\u062c\u0645\u0648\u0639\u0647\u200c\u0627\u06cc \u0627\u0632 \u062a\u0645\u0627\u0645 \u0646\u0634\u0627\u0646\u0647\u200c\u0647\u0627 (\u06a9\u0644\u0645\u0627\u062a) \u062f\u0631 \u0686\u0647\u0627\u0631 \u0645\u0642\u0627\u0644\u0647 \u0648\u06cc\u06a9\u06cc\u200c\u067e\u062f\u06cc\u0627 \u0631\u0627 \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u0645.  \u0628\u0647 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">processed_data = ();\n<span class=\"hljs-keyword\">for<\/span> doc <span class=\"hljs-keyword\">in<\/span> corpus:\n    tokens = preprocess_text(doc)\n    processed_data.append(tokens)\n<\/code><\/pre>\n<p>\u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627 \u0645\u0633\u062a\u0642\u06cc\u0645 \u0627\u0633\u062a.  \u0645\u0627 \u0627\u0632 \u0637\u0631\u06cc\u0642 \u0622\u0646 \u062a\u06a9\u0631\u0627\u0631 \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>corpus<\/code> \u0641\u0647\u0631\u0633\u062a\u06cc \u06a9\u0647 \u0634\u0627\u0645\u0644 \u0686\u0647\u0627\u0631 \u0645\u0642\u0627\u0644\u0647 \u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627 \u0628\u0647 \u0635\u0648\u0631\u062a \u0631\u0634\u062a\u0647 \u0627\u06cc \u0627\u0633\u062a.  \u062f\u0631 \u0647\u0631 \u062a\u06a9\u0631\u0627\u0631\u060c \u0633\u0646\u062f \u0631\u0627 \u0628\u0647 <code>preprocess_text<\/code> \u0631\u0648\u0634\u06cc \u06a9\u0647 \u0642\u0628\u0644\u0627 \u0627\u06cc\u062c\u0627\u062f \u06a9\u0631\u062f\u06cc\u0645  \u0627\u06cc\u0646 \u0631\u0648\u0634 \u0646\u0634\u0627\u0646\u0647 \u0647\u0627\u06cc\u06cc \u0631\u0627 \u0628\u0631\u0627\u06cc \u0622\u0646 \u0633\u0646\u062f \u062e\u0627\u0635 \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f.  \u062a\u0648\u06a9\u0646 \u0647\u0627 \u062f\u0631 <code>processed_data<\/code> \u0641\u0647\u0631\u0633\u062a<\/p>\n<p>\u062f\u0631 \u067e\u0627\u06cc\u0627\u0646 \u0627\u0632 <code>for<\/code> \u062d\u0644\u0642\u0647 \u0647\u0645\u0647 \u0646\u0634\u0627\u0646\u0647 \u0647\u0627\u06cc \u0647\u0631 \u0686\u0647\u0627\u0631 \u0645\u0642\u0627\u0644\u0647 \u062f\u0631 \u0630\u062e\u06cc\u0631\u0647 \u0645\u06cc \u0634\u0648\u0646\u062f <code>processed_data<\/code> \u0641\u0647\u0631\u0633\u062a  \u0627\u06a9\u0646\u0648\u0646 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0632 \u0627\u06cc\u0646 \u0641\u0647\u0631\u0633\u062a \u0628\u0631\u0627\u06cc \u0627\u06cc\u062c\u0627\u062f \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0648 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u0644\u0645\u0627\u062a \u0645\u062a\u0646\u0627\u0638\u0631 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u0645.  \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0631\u0627 \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc \u062f\u0647\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> gensim <span class=\"hljs-keyword\">import<\/span> corpora\n\ngensim_dictionary = corpora.Dictionary(processed_data)\ngensim_corpus = (gensim_dictionary.doc2bow(token, allow_update=<span class=\"hljs-literal\">True<\/span>) <span class=\"hljs-keyword\">for<\/span> token <span class=\"hljs-keyword\">in<\/span> processed_data)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0645\u0631\u062d\u0644\u0647 \u0628\u0639\u062f\u060c \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u062e\u0648\u062f \u0648 \u0647\u0645\u0686\u0646\u06cc\u0646 \u0645\u062c\u0645\u0648\u0639\u0647 \u06a9\u06cc\u0633\u0647 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u062a\u0631\u0634\u06cc \u0630\u062e\u06cc\u0631\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645.  \u0628\u0639\u062f\u0627\u064b \u0627\u0632 \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0630\u062e\u06cc\u0631\u0647 \u0634\u062f\u0647 \u0628\u0631\u0627\u06cc \u067e\u06cc\u0634 \u0628\u06cc\u0646\u06cc \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f \u0631\u0648\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u062c\u062f\u06cc\u062f<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> pickle\n\npickle.dump(gensim_corpus, <span class=\"hljs-built_in\">open<\/span>(<span class=\"hljs-string\">'gensim_corpus_corpus.pkl'<\/span>, <span class=\"hljs-string\">'wb'<\/span>))\ngensim_dictionary.save(<span class=\"hljs-string\">'gensim_dictionary.gensim'<\/span>)\n<\/code><\/pre>\n<p>\u0627\u06a9\u0646\u0648\u0646\u060c \u0645\u0627 \u0647\u0645\u0647 \u0686\u06cc\u0632 \u0645\u0648\u0631\u062f \u0646\u06cc\u0627\u0632 \u0628\u0631\u0627\u06cc \u0627\u06cc\u062c\u0627\u062f \u06cc\u06a9 \u0645\u062f\u0644 LDA \u062f\u0631 Gensim \u0631\u0627 \u062f\u0627\u0631\u06cc\u0645.  \u0645\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f <code>LdaModel<\/code> \u06a9\u0644\u0627\u0633 \u0627\u0632 <code>gensim.models.ldamodel<\/code> \u0645\u0627\u0698\u0648\u0644 \u0628\u0631\u0627\u06cc \u0627\u06cc\u062c\u0627\u062f \u0645\u062f\u0644 LDA.  \u0645\u0627 \u0628\u0627\u06cc\u062f \u067e\u06cc\u06a9\u0631\u0647 \u06a9\u06cc\u0633\u0647 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u06a9\u0647 \u0642\u0628\u0644\u0627\u064b \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0627\u0648\u0644 \u0627\u06cc\u062c\u0627\u062f \u06a9\u0631\u062f\u06cc\u0645 \u0628\u0647 \u0622\u0646 \u067e\u0627\u0633 \u062f\u0647\u06cc\u0645 <code>LdaModel<\/code> \u0633\u0627\u0632\u0646\u062f\u0647\u060c \u0628\u0647 \u062f\u0646\u0628\u0627\u0644 \u0622\u0646 \u062a\u0639\u062f\u0627\u062f \u0645\u0648\u0636\u0648\u0639\u0627\u062a\u060c \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc \u06a9\u0647 \u0642\u0628\u0644\u0627 \u0627\u06cc\u062c\u0627\u062f \u06a9\u0631\u062f\u06cc\u0645\u060c \u0648 \u062a\u0639\u062f\u0627\u062f \u067e\u0627\u0633\u200c\u0647\u0627 (\u062a\u0639\u062f\u0627\u062f \u062a\u06a9\u0631\u0627\u0631 \u0628\u0631\u0627\u06cc \u0645\u062f\u0644).<\/p>\n<p>\u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0631\u0627 \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> gensim\n\nlda_model = gensim.models.ldamodel.LdaModel(gensim_corpus, num_topics=<span class=\"hljs-number\">4<\/span>, id2word=gensim_dictionary, passes=<span class=\"hljs-number\">20<\/span>)\nlda_model.save(<span class=\"hljs-string\">'gensim_model.gensim'<\/span>)\n<\/code><\/pre>\n<p>\u0628\u0644\u0647 \u0622\u0646 \u0628\u0647 \u0622\u0646 \u0633\u0627\u062f\u06af\u06cc \u0627\u0633\u062a.  \u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627\u060c \u0645\u062f\u0644 LDA \u0631\u0627 \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u062e\u0648\u062f \u0627\u06cc\u062c\u0627\u062f \u06a9\u0631\u062f\u06cc\u0645 \u0648 \u0622\u0646 \u0631\u0627 \u0630\u062e\u06cc\u0631\u0647 \u06a9\u0631\u062f\u06cc\u0645.<\/p>\n<p>\u0628\u0639\u062f\u060c \u0627\u062c\u0627\u0632\u0647 \u062f\u0647\u06cc\u062f print 10 \u06a9\u0644\u0645\u0647 \u0628\u0631\u0627\u06cc \u0647\u0631 \u0645\u0648\u0636\u0648\u0639.  \u0628\u0631\u0627\u06cc \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0632 <code>print_topics<\/code> \u0631\u0648\u0634.  \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0631\u0627 \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">topics = lda_model.print_topics(num_words=<span class=\"hljs-number\">10<\/span>)\n<span class=\"hljs-keyword\">for<\/span> topic <span class=\"hljs-keyword\">in<\/span> topics:\n    <span class=\"hljs-built_in\">print<\/span>(topic)\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0628\u0647 \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">(0, '0.036*\"painting\" + 0.018*\"leonardo\" + 0.009*\"louvre\" + 0.009*\"portrait\" + 0.006*\"museum\" + 0.006*\"century\" + 0.006*\"french\" + 0.005*\"giocondo\" + 0.005*\"original\" + 0.004*\"picture\"')\n\n(1, '0.016*\"intelligence\" + 0.014*\"machine\" + 0.012*\"artificial\" + 0.011*\"problem\" + 0.010*\"learning\" + 0.009*\"system\" + 0.008*\"network\" + 0.007*\"research\" + 0.007*\"knowledge\" + 0.007*\"computer\"')\n\n(2, '0.026*\"eiffel\" + 0.008*\"second\" + 0.006*\"french\" + 0.006*\"structure\" + 0.006*\"exposition\" + 0.005*\"tallest\" + 0.005*\"engineer\" + 0.004*\"design\" + 0.004*\"france\" + 0.004*\"restaurant\"')\n\n(3, '0.031*\"climate\" + 0.026*\"change\" + 0.024*\"warming\" + 0.022*\"global\" + 0.014*\"emission\" + 0.013*\"effect\" + 0.012*\"greenhouse\" + 0.011*\"temperature\" + 0.007*\"carbon\" + 0.006*\"increase\"')\n<\/code><\/pre>\n<p>\u0645\u0628\u062d\u062b \u0627\u0648\u0644 \u0634\u0627\u0645\u0644 \u06a9\u0644\u0645\u0627\u062a\u06cc \u0645\u0627\u0646\u0646\u062f <code>painting<\/code>\u060c <code>louvre<\/code>\u060c <code>portrait<\/code>\u060c <code>french<\/code> <code>museum<\/code>\u0648 \u063a\u06cc\u0631\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0641\u0631\u0636 \u06a9\u0631\u062f \u06a9\u0647 \u0627\u06cc\u0646 \u06a9\u0644\u0645\u0627\u062a \u0645\u062a\u0639\u0644\u0642 \u0628\u0647 \u0645\u0648\u0636\u0648\u0639\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u06cc\u06a9 \u0639\u06a9\u0633 \u0628\u0627 \u0627\u0631\u062a\u0628\u0627\u0637 \u0641\u0631\u0627\u0646\u0633\u0648\u06cc \u0627\u0633\u062a.<\/p>\n<p>\u0628\u0647 \u0637\u0648\u0631 \u0645\u0634\u0627\u0628\u0647\u060c \u062f\u0648\u0645 \u062d\u0627\u0648\u06cc \u06a9\u0644\u0645\u0627\u062a\u06cc \u0645\u0627\u0646\u0646\u062f <code>intelligence<\/code>\u060c <code>machine<\/code>\u060c <code>research<\/code>\u0648 \u063a\u06cc\u0631\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0641\u0631\u0636 \u06a9\u0631\u062f \u06a9\u0647 \u0627\u06cc\u0646 \u06a9\u0644\u0645\u0627\u062a \u0645\u062a\u0639\u0644\u0642 \u0628\u0647 \u0645\u0628\u062d\u062b \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0647\u0648\u0634 \u0645\u0635\u0646\u0648\u0639\u06cc \u0627\u0633\u062a.<\/p>\n<p>\u0628\u0647 \u0647\u0645\u06cc\u0646 \u062a\u0631\u062a\u06cc\u0628\u060c \u06a9\u0644\u0645\u0627\u062a \u0627\u0632 \u0645\u0628\u062d\u062b \u0633\u0648\u0645 \u0648 \u0686\u0647\u0627\u0631\u0645 \u0628\u0647 \u0627\u06cc\u0646 \u0648\u0627\u0642\u0639\u06cc\u062a \u0627\u0634\u0627\u0631\u0647 \u062f\u0627\u0631\u062f \u06a9\u0647 \u0627\u06cc\u0646 \u06a9\u0644\u0645\u0627\u062a \u0628\u0647 \u062a\u0631\u062a\u06cc\u0628 \u0628\u062e\u0634\u06cc \u0627\u0632 \u0645\u0628\u062d\u062b \u0628\u0631\u062c \u0627\u06cc\u0641\u0644 \u0648 \u06af\u0631\u0645\u0627\u06cc\u0634 \u062c\u0647\u0627\u0646\u06cc \u0647\u0633\u062a\u0646\u062f.<\/p>\n<p>\u0645\u0627 \u0628\u0647 \u0648\u0636\u0648\u062d \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0628\u0628\u06cc\u0646\u06cc\u0645 \u06a9\u0647 \u0645\u062f\u0644 LDA \u0628\u0627 \u0645\u0648\u0641\u0642\u06cc\u062a \u0686\u0647\u0627\u0631 \u0645\u0648\u0636\u0648\u0639 \u0631\u0627 \u062f\u0631 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u0645\u0627 \u0634\u0646\u0627\u0633\u0627\u06cc\u06cc \u06a9\u0631\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<p>\u0630\u06a9\u0631 \u0627\u06cc\u0646 \u0646\u06a9\u062a\u0647 \u0636\u0631\u0648\u0631\u06cc \u0627\u0633\u062a \u06a9\u0647 LDA \u06cc\u06a9 \u0627\u0644\u06af\u0648\u0631\u06cc\u062a\u0645 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u062f\u0648\u0646 \u0646\u0638\u0627\u0631\u062a \u0627\u0633\u062a \u0648 \u062f\u0631 \u0645\u0633\u0627\u0626\u0644 \u062f\u0646\u06cc\u0627\u06cc \u0648\u0627\u0642\u0639\u06cc\u060c \u0627\u0632 \u0642\u0628\u0644 \u0627\u0632 \u0645\u0648\u0636\u0648\u0639\u0627\u062a \u0645\u0648\u062c\u0648\u062f \u062f\u0631 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u0627\u0637\u0644\u0627\u0639\u06cc \u0646\u062e\u0648\u0627\u0647\u06cc\u062f \u062f\u0627\u0634\u062a.  \u0628\u0647 \u0633\u0627\u062f\u06af\u06cc \u06cc\u06a9 \u0645\u062c\u0645\u0648\u0639\u0647 \u0628\u0647 \u0634\u0645\u0627 \u062f\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f\u060c \u0645\u0648\u0636\u0648\u0639\u0627\u062a \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 LDA \u0627\u06cc\u062c\u0627\u062f \u0645\u06cc \u0634\u0648\u0646\u062f \u0648 \u0633\u067e\u0633 \u0646\u0627\u0645 \u0645\u0648\u0636\u0648\u0639\u0627\u062a \u0628\u0647 \u0639\u0647\u062f\u0647 \u0634\u0645\u0627\u0633\u062a.<\/p>\n<p>\u0628\u06cc\u0627\u06cc\u06cc\u062f \u0627\u06a9\u0646\u0648\u0646 8 \u0645\u0648\u0636\u0648\u0639 \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u062e\u0648\u062f \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u0645.  \u0645\u0627 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f print 5 \u06a9\u0644\u0645\u0647 \u062f\u0631 \u0647\u0631 \u0645\u0648\u0636\u0648\u0639:<\/p>\n<pre><code class=\"hljs\">lda_model = gensim.models.ldamodel.LdaModel(gensim_corpus, num_topics=<span class=\"hljs-number\">8<\/span>, id2word=gensim_dictionary, passes=<span class=\"hljs-number\">15<\/span>)\nlda_model.save(<span class=\"hljs-string\">'gensim_model.gensim'<\/span>)\ntopics = lda_model.print_topics(num_words=<span class=\"hljs-number\">5<\/span>)\n<span class=\"hljs-keyword\">for<\/span> topic <span class=\"hljs-keyword\">in<\/span> topics:\n    <span class=\"hljs-built_in\">print<\/span>(topic)\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0628\u0647 \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">(0, '0.000*\"climate\" + 0.000*\"change\" + 0.000*\"eiffel\" + 0.000*\"warming\" + 0.000*\"global\"')\n(1, '0.018*\"intelligence\" + 0.016*\"machine\" + 0.013*\"artificial\" + 0.012*\"problem\" + 0.010*\"learning\"')\n(2, '0.045*\"painting\" + 0.023*\"leonardo\" + 0.012*\"louvre\" + 0.011*\"portrait\" + 0.008*\"museum\"')\n(3, '0.000*\"intelligence\" + 0.000*\"machine\" + 0.000*\"problem\" + 0.000*\"artificial\" + 0.000*\"system\"')\n(4, '0.035*\"climate\" + 0.030*\"change\" + 0.027*\"warming\" + 0.026*\"global\" + 0.015*\"emission\"')\n(5, '0.031*\"eiffel\" + 0.009*\"second\" + 0.007*\"french\" + 0.007*\"structure\" + 0.007*\"exposition\"')\n(6, '0.000*\"painting\" + 0.000*\"machine\" + 0.000*\"system\" + 0.000*\"intelligence\" + 0.000*\"problem\"')\n(7, '0.000*\"climate\" + 0.000*\"change\" + 0.000*\"global\" + 0.000*\"machine\" + 0.000*\"intelligence\"')\n<\/code><\/pre>\n<p>\u0628\u0627\u0632 \u0647\u0645\u060c \u062a\u0639\u062f\u0627\u062f \u0645\u0648\u0636\u0648\u0639\u0627\u062a\u06cc \u06a9\u0647 \u0645\u06cc \u062e\u0648\u0627\u0647\u06cc\u062f \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f \u0628\u0647 \u0634\u0645\u0627 \u0628\u0633\u062a\u06af\u06cc \u062f\u0627\u0631\u062f.  \u062a\u0627 \u0632\u0645\u0627\u0646\u06cc \u06a9\u0647 \u0645\u0648\u0636\u0648\u0639\u0627\u062a \u0645\u0646\u0627\u0633\u0628 \u0631\u0627 \u067e\u06cc\u062f\u0627 \u06a9\u0646\u06cc\u062f\u060c \u0627\u0639\u062f\u0627\u062f \u0645\u062e\u062a\u0644\u0641 \u0631\u0627 \u0627\u0645\u062a\u062d\u0627\u0646 \u06a9\u0646\u06cc\u062f.  \u0628\u0631\u0627\u06cc \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u0645\u0627\u060c \u062a\u0639\u062f\u0627\u062f \u0645\u0646\u0627\u0633\u0628 \u0645\u0648\u0636\u0648\u0639\u0627\u062a 4 \u0627\u0633\u062a\u060c \u0632\u06cc\u0631\u0627 \u0627\u0632 \u0642\u0628\u0644 \u0645\u06cc \u062f\u0627\u0646\u06cc\u0645 \u06a9\u0647 \u0645\u062c\u0645\u0648\u0639\u0647 \u0645\u0627 \u062d\u0627\u0648\u06cc \u06a9\u0644\u0645\u0627\u062a\u06cc \u0627\u0632 \u0686\u0647\u0627\u0631 \u0645\u0642\u0627\u0644\u0647 \u0645\u062e\u062a\u0644\u0641 \u0627\u0633\u062a.  \u0628\u0627 \u0627\u062c\u0631\u0627\u06cc \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0628\u0647 \u0686\u0647\u0627\u0631 \u0645\u0628\u062d\u062b \u0628\u0631\u06af\u0631\u062f\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">lda_model = gensim.models.ldamodel.LdaModel(gensim_corpus, num_topics=<span class=\"hljs-number\">4<\/span>, id2word=gensim_dictionary, passes=<span class=\"hljs-number\">20<\/span>)\nlda_model.save(<span class=\"hljs-string\">'gensim_model.gensim'<\/span>)\ntopics = lda_model.print_topics(num_words=<span class=\"hljs-number\">10<\/span>)\n<span class=\"hljs-keyword\">for<\/span> topic <span class=\"hljs-keyword\">in<\/span> topics:\n    <span class=\"hljs-built_in\">print<\/span>(topic)\n<\/code><\/pre>\n<p>\u0627\u06cc\u0646 \u0628\u0627\u0631\u060c \u0646\u062a\u0627\u06cc\u062c \u0645\u062a\u0641\u0627\u0648\u062a\u06cc \u062e\u0648\u0627\u0647\u06cc\u062f \u062f\u06cc\u062f \u0632\u06cc\u0631\u0627 \u0645\u0642\u0627\u062f\u06cc\u0631 \u0627\u0648\u0644\u06cc\u0647 \u0628\u0631\u0627\u06cc \u067e\u0627\u0631\u0627\u0645\u062a\u0631\u0647\u0627\u06cc LDA \u0628\u0647 \u0635\u0648\u0631\u062a \u062a\u0635\u0627\u062f\u0641\u06cc \u0627\u0646\u062a\u062e\u0627\u0628 \u0634\u062f\u0647 \u0627\u0646\u062f.  \u0646\u062a\u0627\u06cc\u062c \u0627\u06cc\u0646 \u0628\u0627\u0631 \u0628\u0647 \u0634\u0631\u062d \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">(0, '0.031*\"climate\" + 0.027*\"change\" + 0.024*\"warming\" + 0.023*\"global\" + 0.014*\"emission\" + 0.013*\"effect\" + 0.012*\"greenhouse\" + 0.011*\"temperature\" + 0.007*\"carbon\" + 0.006*\"increase\"')\n\n(1, '0.026*\"eiffel\" + 0.008*\"second\" + 0.006*\"french\" + 0.006*\"structure\" + 0.006*\"exposition\" + 0.005*\"tallest\" + 0.005*\"engineer\" + 0.004*\"design\" + 0.004*\"france\" + 0.004*\"restaurant\"')\n\n(2, '0.037*\"painting\" + 0.019*\"leonardo\" + 0.009*\"louvre\" + 0.009*\"portrait\" + 0.006*\"museum\" + 0.006*\"century\" + 0.006*\"french\" + 0.005*\"giocondo\" + 0.005*\"original\" + 0.004*\"subject\"')\n\n(3, '0.016*\"intelligence\" + 0.014*\"machine\" + 0.012*\"artificial\" + 0.011*\"problem\" + 0.010*\"learning\" + 0.009*\"system\" + 0.008*\"network\" + 0.007*\"knowledge\" + 0.007*\"research\" + 0.007*\"computer\"')\n<\/code><\/pre>\n<p>\u0645\u06cc \u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u06a9\u0644\u0645\u0627\u062a \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0645\u0628\u062d\u062b \u0627\u0648\u0644 \u062f\u0631 \u062d\u0627\u0644 \u062d\u0627\u0636\u0631 \u0628\u06cc\u0634\u062a\u0631 \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u06af\u0631\u0645\u0627\u06cc\u0634 \u062c\u0647\u0627\u0646\u06cc \u0627\u0633\u062a\u060c \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u0645\u0648\u0636\u0648\u0639 \u062f\u0648\u0645 \u062d\u0627\u0648\u06cc \u06a9\u0644\u0645\u0627\u062a \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0628\u0631\u062c \u0627\u06cc\u0641\u0644 \u0627\u0633\u062a.<\/p>\n<h3 id=\"evaluatingtheldamodel\"><span class=\"ez-toc-section\" id=\"%d8%a7%d8%b1%d8%b2%db%8c%d8%a7%d8%a8%db%8c_%d9%85%d8%af%d9%84_lda\"><\/span>\u0627\u0631\u0632\u06cc\u0627\u0628\u06cc \u0645\u062f\u0644 LDA<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0647\u0645\u0627\u0646\u0637\u0648\u0631 \u06a9\u0647 \u0642\u0628\u0644\u0627\u064b \u06af\u0641\u062a\u0645\u060c \u0627\u0631\u0632\u06cc\u0627\u0628\u06cc \u0645\u062f\u0644\u200c\u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u062f\u0648\u0646 \u0646\u0638\u0627\u0631\u062a \u0633\u062e\u062a \u0627\u0633\u062a\u060c \u0632\u06cc\u0631\u0627 \u0647\u06cc\u0686 \u062d\u0642\u06cc\u0642\u062a \u0645\u0634\u062e\u0635\u06cc \u0648\u062c\u0648\u062f \u0646\u062f\u0627\u0631\u062f \u06a9\u0647 \u0628\u062a\u0648\u0627\u0646\u06cc\u0645 \u062e\u0631\u0648\u062c\u06cc \u0645\u062f\u0644 \u062e\u0648\u062f \u0631\u0627 \u062f\u0631 \u0628\u0631\u0627\u0628\u0631 \u0622\u0646 \u0622\u0632\u0645\u0627\u06cc\u0634 \u06a9\u0646\u06cc\u0645.<\/p>\n<p>\u0641\u0631\u0636 \u06a9\u0646\u06cc\u062f \u06cc\u06a9 \u0633\u0646\u062f \u0645\u062a\u0646\u06cc \u062c\u062f\u06cc\u062f \u062f\u0627\u0631\u06cc\u0645 \u0648 \u0645\u06cc\u200c\u062e\u0648\u0627\u0647\u06cc\u0645 \u0645\u0648\u0636\u0648\u0639 \u0622\u0646 \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0645\u062f\u0644 LDA \u06a9\u0647 \u0627\u06cc\u062c\u0627\u062f \u06a9\u0631\u062f\u06cc\u0645 \u067e\u06cc\u062f\u0627 \u06a9\u0646\u06cc\u0645\u060c \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\">test_doc = <span class=\"hljs-string\">'Great structures are built to remember an event that happened in history.'<\/span>\ntest_doc = preprocess_text(test_doc)\nbow_test_doc = gensim_dictionary.doc2bow(test_doc)\n\n<span class=\"hljs-built_in\">print<\/span>(lda_model.get_document_topics(bow_test_doc))\n<\/code><\/pre>\n<p>\u062f\u0631 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0628\u0627\u0644\u0627\u060c \u06cc\u06a9 \u0631\u0634\u062a\u0647 \u0627\u06cc\u062c\u0627\u062f \u06a9\u0631\u062f\u06cc\u0645\u060c \u0646\u0645\u0627\u06cc\u0634 \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0622\u0646 \u0631\u0627 \u0627\u06cc\u062c\u0627\u062f \u06a9\u0631\u062f\u06cc\u0645 \u0648 \u0633\u067e\u0633 \u0631\u0634\u062a\u0647 \u0631\u0627 \u0628\u0647 \u067e\u06cc\u06a9\u0631\u0647 ba- of-words \u062a\u0628\u062f\u06cc\u0644 \u06a9\u0631\u062f\u06cc\u0645.  \u0633\u067e\u0633 \u0646\u0645\u0627\u06cc\u0634 \u06a9\u06cc\u0633\u0647 \u06a9\u0644\u0645\u0627\u062a \u0628\u0647 \u0642\u0633\u0645\u062a \u0627\u0631\u0633\u0627\u0644 \u0645\u06cc \u0634\u0648\u062f <code>get_document_topics<\/code> \u0631\u0648\u0634.  \u062e\u0631\u0648\u062c\u06cc \u0628\u0647 \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">((0, 0.08422605), (1, 0.7446843), (2, 0.087012805), (3, 0.08407689))\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f \u06a9\u0647 \u0627\u062d\u062a\u0645\u0627\u0644 8.4% \u0648\u062c\u0648\u062f \u062f\u0627\u0631\u062f \u06a9\u0647 \u0633\u0646\u062f \u062c\u062f\u06cc\u062f \u0645\u062a\u0639\u0644\u0642 \u0628\u0647 \u0645\u0628\u062d\u062b 1 \u0628\u0627\u0634\u062f (\u06a9\u0644\u0645\u0627\u062a \u0645\u0648\u0636\u0648\u0639 1 \u0631\u0627 \u062f\u0631 \u0622\u062e\u0631\u06cc\u0646 \u062e\u0631\u0648\u062c\u06cc \u0628\u0628\u06cc\u0646\u06cc\u062f).  \u0628\u0647 \u0647\u0645\u06cc\u0646 \u062a\u0631\u062a\u06cc\u0628\u060c 74.4 \u062f\u0631\u0635\u062f \u0627\u062d\u062a\u0645\u0627\u0644 \u062f\u0627\u0631\u062f \u06a9\u0647 \u0627\u06cc\u0646 \u0633\u0646\u062f \u0628\u0647 \u0645\u0648\u0636\u0648\u0639 \u062f\u0648\u0645 \u062a\u0639\u0644\u0642 \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u062f.  \u0627\u06af\u0631 \u0628\u0647 \u0645\u0628\u062d\u062b \u062f\u0648\u0645 \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u0645\u060c \u062d\u0627\u0648\u06cc \u06a9\u0644\u0645\u0627\u062a \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0628\u0631\u062c \u0627\u06cc\u0641\u0644 \u0627\u0633\u062a.  \u0633\u0646\u062f \u0622\u0632\u0645\u0648\u0646 \u0645\u0627 \u0647\u0645\u0686\u0646\u06cc\u0646 \u062d\u0627\u0648\u06cc \u06a9\u0644\u0645\u0627\u062a \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0633\u0627\u0632\u0647 \u0647\u0627 \u0648 \u0633\u0627\u062e\u062a\u0645\u0627\u0646 \u0647\u0627 \u0627\u0633\u062a.  \u0644\u0630\u0627 \u0645\u0628\u062d\u062b \u062f\u0648\u0645 \u0628\u0647 \u0622\u0646 \u0627\u062e\u062a\u0635\u0627\u0635 \u06cc\u0627\u0641\u062a\u0647 \u0627\u0633\u062a.<\/p>\n<p>\u0631\u0648\u0634 \u062f\u06cc\u06af\u0631 \u0628\u0631\u0627\u06cc \u0627\u0631\u0632\u06cc\u0627\u0628\u06cc \u0645\u062f\u0644 LDA \u0627\u0632 \u0637\u0631\u06cc\u0642 <a rel=\"nofollow noopener\" target=\"_blank\" href=\"http:\/\/qpleple.com\/perplexity-to-evaluate-topic-models\/\">\u06af\u06cc\u062c\u06cc<\/a> \u0648 <a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/datascienceplus.com\/evaluation-of-topic-modeling-topic-coherence\/\">\u0627\u0645\u062a\u06cc\u0627\u0632 \u0627\u0646\u0633\u062c\u0627\u0645<\/a>.<\/p>\n<p>\u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u06cc\u06a9 \u0642\u0627\u0646\u0648\u0646 \u06a9\u0644\u06cc \u0628\u0631\u0627\u06cc \u06cc\u06a9 \u0645\u062f\u0644 LDA \u062e\u0648\u0628\u060c \u0627\u0645\u062a\u06cc\u0627\u0632 \u06af\u06cc\u062c\u06cc \u0628\u0627\u06cc\u062f \u06a9\u0645 \u0628\u0627\u0634\u062f \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u0627\u0646\u0633\u062c\u0627\u0645 \u0628\u0627\u06cc\u062f \u0632\u06cc\u0627\u062f \u0628\u0627\u0634\u062f.  \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u062f\u0627\u0631\u0627\u06cc \u06cc\u06a9 <code>CoherenceModel<\/code> \u06a9\u0644\u0627\u0633 \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u0628\u0631\u0627\u06cc \u06cc\u0627\u0641\u062a\u0646 \u0627\u0646\u0633\u062c\u0627\u0645 \u0645\u062f\u0644 LDA \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0634\u0648\u062f.  \u0628\u0631\u0627\u06cc \u062d\u06cc\u0631\u062a\u060c <code>LdaModel<\/code> \u0634\u06cc \u0634\u0627\u0645\u0644 <code>log_perplexity<\/code> \u0631\u0648\u0634\u06cc \u06a9\u0647 \u06cc\u06a9 \u067e\u06cc\u06a9\u0631\u0647 \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0645\u06cc \u06af\u06cc\u0631\u062f \u0648 \u06af\u06cc\u062c\u06cc \u0645\u0631\u0628\u0648\u0637\u0647 \u0631\u0627 \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f.<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-built_in\">print<\/span>(<span class=\"hljs-string\">'\\nPerplexity:'<\/span>, lda_model.log_perplexity(gensim_corpus))\n\n<span class=\"hljs-keyword\">from<\/span> gensim.models <span class=\"hljs-keyword\">import<\/span> CoherenceModel\n\ncoherence_score_lda = CoherenceModel(model=lda_model, texts=processed_data, dictionary=gensim_dictionary, coherence=<span class=\"hljs-string\">'c_v'<\/span>)\ncoherence_score = coherence_score_lda.get_coherence()\n\n<span class=\"hljs-built_in\">print<\/span>(<span class=\"hljs-string\">'\\nCoherence Score:'<\/span>, coherence_score)\n<\/code><\/pre>\n<p>\u0631\u0627 <code>CoherenceModel<\/code> \u06a9\u0644\u0627\u0633 \u0645\u062f\u0644 LDA\u060c \u0645\u062a\u0646 \u0646\u0634\u0627\u0646\u0647 \u06af\u0630\u0627\u0631\u06cc \u0634\u062f\u0647\u060c \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a \u0648 \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc \u0631\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0645\u06cc \u06af\u06cc\u0631\u062f.  \u0628\u0631\u0627\u06cc \u0628\u0647 \u062f\u0633\u062a \u0622\u0648\u0631\u062f\u0646 \u0646\u0645\u0631\u0647 \u0627\u0646\u0633\u062c\u0627\u0645\u060c <code>get_coherence<\/code> \u0631\u0648\u0634 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f.  \u062e\u0631\u0648\u062c\u06cc \u0628\u0647 \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">Perplexity: -7.492867099178969\n\nCoherence Score: 0.718387005948207\n<\/code><\/pre>\n<h3 id=\"visualizingthelda\"><span class=\"ez-toc-section\" id=\"%d8%aa%d8%ac%d8%b3%d9%85_lda\"><\/span>\u062a\u062c\u0633\u0645 LDA<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0628\u0631\u0627\u06cc \u062a\u062c\u0633\u0645 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u062e\u0648\u062f\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0632 \u0622\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u0645 <code>pyLDAvis<\/code> \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0627\u06cc \u06a9\u0647 \u062f\u0631 \u0627\u0628\u062a\u062f\u0627\u06cc \u0645\u0642\u0627\u0644\u0647 \u062f\u0627\u0646\u0644\u0648\u062f \u06a9\u0631\u062f\u06cc\u0645.  \u0627\u06cc\u0646 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0634\u0627\u0645\u0644 \u06cc\u06a9 \u0645\u0627\u0698\u0648\u0644 \u0628\u0631\u0627\u06cc \u0645\u062f\u0644 Gensim LDA \u0627\u0633\u062a.  \u0627\u0628\u062a\u062f\u0627 \u0628\u0627\u06cc\u062f \u0628\u0627 \u0627\u0631\u0633\u0627\u0644 \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a\u060c \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u06cc \u0627\u0632 \u06a9\u0644\u0645\u0627\u062a \u0648 \u0645\u062f\u0644 LDA\u060c \u062a\u062c\u0633\u0645 \u0631\u0627 \u0622\u0645\u0627\u062f\u0647 \u06a9\u0646\u06cc\u0645. <code>prepare<\/code> \u0631\u0648\u0634.  \u062f\u0631 \u0645\u0631\u062d\u0644\u0647 \u0628\u0639\u062f\u060c \u0628\u0627\u06cc\u062f \u0628\u0627 \u0622\u0646 \u062a\u0645\u0627\u0633 \u0628\u06af\u06cc\u0631\u06cc\u0645 <code>display<\/code> \u0631\u0648\u06cc  \u0631\u0627 <code>gensim<\/code> \u0645\u0627\u0698\u0648\u0644 \u0627\u0632 <code>pyLDAvis<\/code> \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647\u060c \u0645\u0637\u0627\u0628\u0642 \u0634\u06a9\u0644 \u0632\u06cc\u0631:<\/p>\n<pre><code class=\"hljs\">gensim_dictionary = gensim.corpora.Dictionary.load(<span class=\"hljs-string\">'gensim_dictionary.gensim'<\/span>)\ngensim_corpus = pickle.load(<span class=\"hljs-built_in\">open<\/span>(<span class=\"hljs-string\">'gensim_corpus_corpus.pkl'<\/span>, <span class=\"hljs-string\">'rb'<\/span>))\nlda_model = gensim.models.ldamodel.LdaModel.load(<span class=\"hljs-string\">'gensim_model.gensim'<\/span>)\n\n<span class=\"hljs-keyword\">import<\/span> pyLDAvis.gensim\n\nlda_visualization = pyLDAvis.gensim.prepare(lda_model, gensim_corpus, gensim_dictionary, sort_topics=<span class=\"hljs-literal\">False<\/span>)\npyLDAvis.display(lda_visualization)\n<\/code><\/pre>\n<p>\u062f\u0631 \u062e\u0631\u0648\u062c\u06cc \u062a\u0635\u0648\u06cc\u0631 \u0632\u06cc\u0631 \u0631\u0627 \u0645\u0634\u0627\u0647\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u062f \u06a9\u0631\u062f:<\/p>\n<p><img decoding=\"async\" class=\"img-responsive\" src=\"https:\/\/rasanegar.com\/blog\/wp-content\/uploads\/2024\/01\/python-nlp-working-with-gensim-library-part-2-1.png\" alt=\"\" title=\"\"><\/p>\n<p>\u0647\u0631 \u062f\u0627\u06cc\u0631\u0647 \u062f\u0631 \u062a\u0635\u0648\u06cc\u0631 \u0628\u0627\u0644\u0627 \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u06cc\u06a9 \u0645\u0648\u0636\u0648\u0639 \u0627\u0633\u062a.  \u0627\u0632 \u062e\u0631\u0648\u062c\u06cc \u0645\u062f\u0644 LDA \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 4 \u0645\u0628\u062d\u062b \u0645\u06cc \u062f\u0627\u0646\u06cc\u0645 \u06a9\u0647 \u0645\u0628\u062d\u062b \u0627\u0648\u0644 \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u06af\u0631\u0645\u0627\u06cc\u0634 \u062c\u0647\u0627\u0646\u06cc\u060c \u0645\u0628\u062d\u062b \u062f\u0648\u0645 \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0628\u0631\u062c \u0627\u06cc\u0641\u0644\u060c \u0645\u0628\u062d\u062b \u0633\u0648\u0645 \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0645\u0648\u0646\u0627\u0644\u06cc\u0632\u0627 \u0648 \u0645\u0628\u062d\u062b \u0686\u0647\u0627\u0631\u0645 \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0645\u0648\u0636\u0648\u0639 \u0645\u0635\u0646\u0648\u0639\u06cc \u0627\u0633\u062a. \u0647\u0648\u0634.<\/p>\n<p>\u0641\u0627\u0635\u0644\u0647 \u0628\u06cc\u0646 \u062f\u0627\u06cc\u0631\u0647 \u0647\u0627 \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f \u06a9\u0647 \u0645\u0648\u0636\u0648\u0639\u0627\u062a \u0686\u0642\u062f\u0631 \u0628\u0627 \u06cc\u06a9\u062f\u06cc\u06af\u0631 \u0645\u062a\u0641\u0627\u0648\u062a \u0647\u0633\u062a\u0646\u062f.  \u0645\u06cc \u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u062f\u0627\u06cc\u0631\u0647 \u0647\u0627\u06cc 2 \u0648 3 \u0628\u0627 \u0647\u0645 \u0647\u0645\u067e\u0648\u0634\u0627\u0646\u06cc \u062f\u0627\u0631\u0646\u062f.  \u0627\u06cc\u0646 \u0628\u0647 \u0627\u06cc\u0646 \u062f\u0644\u06cc\u0644 \u0627\u0633\u062a \u06a9\u0647 \u0645\u0628\u062d\u062b 2 (\u0628\u0631\u062c \u0627\u06cc\u0641\u0644) \u0648 \u0645\u0628\u062d\u062b 3 (\u0645\u0648\u0646\u0627\u0644\u06cc\u0632\u0627) \u06a9\u0644\u0645\u0627\u062a \u0645\u0634\u062a\u0631\u06a9 \u0632\u06cc\u0627\u062f\u06cc \u062f\u0627\u0631\u0646\u062f \u0645\u0627\u0646\u0646\u062f &#8220;\u0641\u0631\u0627\u0646\u0633\u0647&#8221;\u060c &#8220;\u0641\u0631\u0627\u0646\u0633\u0647&#8221;\u060c &#8220;\u0645\u0648\u0632\u0647&#8221;\u060c &#8220;\u067e\u0627\u0631\u06cc\u0633&#8221; \u0648 \u063a\u06cc\u0631\u0647.<\/p>\n<p>\u0627\u06af\u0631 \u0645\u0627\u0648\u0633 \u0631\u0627 \u0631\u0648\u06cc \u0647\u0631 \u06a9\u0644\u0645\u0647 \u0627\u06cc \u0646\u06af\u0647 \u062f\u0627\u0631\u06cc\u062f \u0631\u0648\u06cc \u062f\u0631 \u0633\u0645\u062a \u0631\u0627\u0633\u062a\u060c \u0641\u0642\u0637 \u062f\u0627\u06cc\u0631\u0647 \u0645\u0648\u0636\u0648\u0639\u06cc \u0631\u0627 \u062e\u0648\u0627\u0647\u06cc\u062f \u062f\u06cc\u062f \u06a9\u0647 \u062d\u0627\u0648\u06cc \u06a9\u0644\u0645\u0647 \u0627\u0633\u062a.  \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0645\u062b\u0627\u0644\u060c \u0627\u06af\u0631 \u0645\u0627\u0648\u0633 \u0631\u0627 \u0631\u0648\u06cc \u06a9\u0644\u0645\u0647 &#8220;\u0627\u0642\u0644\u06cc\u0645&#8221; \u0646\u06af\u0647 \u062f\u0627\u0631\u06cc\u062f\u060c \u062e\u0648\u0627\u0647\u06cc\u062f \u062f\u06cc\u062f \u06a9\u0647 \u0645\u0628\u062d\u062b 2 \u0648 4 \u0646\u0627\u067e\u062f\u06cc\u062f \u0645\u06cc \u0634\u0648\u0646\u062f \u0632\u06cc\u0631\u0627 \u062d\u0627\u0648\u06cc \u06a9\u0644\u0645\u0647 \u0627\u0642\u0644\u06cc\u0645 \u0646\u06cc\u0633\u062a\u0646\u062f.  \u062d\u062c\u0645 \u0645\u0628\u062d\u062b 1 \u0627\u0641\u0632\u0627\u06cc\u0634 \u062e\u0648\u0627\u0647\u062f \u06cc\u0627\u0641\u062a \u0632\u06cc\u0631\u0627 \u0628\u06cc\u0634\u062a\u0631 \u0645\u0648\u0627\u0631\u062f \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u0644\u0645\u0647 &#8220;\u0627\u0642\u0644\u06cc\u0645&#8221; \u062f\u0631 \u0645\u0628\u062d\u062b \u0627\u0648\u0644 \u0627\u0633\u062a.  \u062f\u0631\u0635\u062f \u0628\u0633\u06cc\u0627\u0631 \u06a9\u0645\u06cc \u062f\u0631 \u0645\u0628\u062d\u062b 3 \u0627\u0633\u062a \u06a9\u0647 \u062f\u0631 \u062a\u0635\u0648\u06cc\u0631 \u0632\u06cc\u0631 \u0646\u0634\u0627\u0646 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0627\u0633\u062a:<\/p>\n<p><img decoding=\"async\" class=\"img-responsive\" src=\"https:\/\/rasanegar.com\/blog\/wp-content\/uploads\/2024\/01\/python-nlp-working-with-gensim-library-part-2-2.png\" alt=\"\" title=\"\"><\/p>\n<p>\u0628\u0647 \u0637\u0648\u0631 \u0645\u0634\u0627\u0628\u0647\u060c \u0627\u06af\u0631 \u0631\u0648\u06cc \u0647\u0631 \u06cc\u06a9 \u0627\u0632 \u062d\u0644\u0642\u0647\u200c\u0647\u0627 \u06a9\u0644\u06cc\u06a9 \u06a9\u0646\u06cc\u062f\u060c \u0641\u0647\u0631\u0633\u062a\u06cc \u0627\u0632 \u0631\u0627\u06cc\u062c\u200c\u062a\u0631\u06cc\u0646 \u0627\u0635\u0637\u0644\u0627\u062d\u0627\u062a \u0628\u0631\u0627\u06cc \u0622\u0646 \u0645\u0648\u0636\u0648\u0639 \u0638\u0627\u0647\u0631 \u0645\u06cc\u200c\u0634\u0648\u062f \u0631\u0648\u06cc \u062d\u0642 \u0647\u0645\u0631\u0627\u0647 \u0628\u0627 \u0641\u0631\u0627\u0648\u0627\u0646\u06cc \u0648\u0642\u0648\u0639 \u062f\u0631 \u0647\u0645\u06cc\u0646 \u0645\u0648\u0636\u0648\u0639.  \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0645\u062b\u0627\u0644\u060c \u0627\u06af\u0631 \u0645\u0627\u0648\u0633 \u0631\u0627 \u0631\u0648\u06cc \u062f\u0627\u06cc\u0631\u0647 2 \u06a9\u0647 \u0645\u0631\u0628\u0648\u0637 \u0628\u0647 \u0645\u0648\u0636\u0648\u0639 &#8220;\u0628\u0631\u062c \u0627\u06cc\u0641\u0644&#8221; \u0627\u0633\u062a \u0642\u0631\u0627\u0631 \u062f\u0647\u06cc\u062f\u060c \u0646\u062a\u0627\u06cc\u062c \u0632\u06cc\u0631 \u0631\u0627 \u0645\u0634\u0627\u0647\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u062f \u06a9\u0631\u062f:<\/p>\n<p><img decoding=\"async\" class=\"img-responsive\" src=\"https:\/\/rasanegar.com\/blog\/wp-content\/uploads\/2024\/01\/python-nlp-working-with-gensim-library-part-2-3.png\" alt=\"\" title=\"\"><\/p>\n<p>\u0627\u0632 \u062e\u0631\u0648\u062c\u06cc \u0645\u06cc \u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u062f\u0627\u06cc\u0631\u0647 \u0645\u0648\u0636\u0648\u0639 \u062f\u0648\u0645 \u06cc\u0639\u0646\u06cc \u00ab\u0628\u0631\u062c \u0627\u06cc\u0641\u0644\u00bb \u0627\u0646\u062a\u062e\u0627\u0628 \u0634\u062f\u0647 \u0627\u0633\u062a.  \u0627\u0632 \u0644\u06cc\u0633\u062a \u0631\u0648\u06cc \u062f\u0631\u0633\u062a \u0627\u0633\u062a\u060c \u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0631\u0627\u06cc\u062c \u062a\u0631\u06cc\u0646 \u0627\u0635\u0637\u0644\u0627\u062d\u0627\u062a \u0628\u0631\u0627\u06cc \u0645\u0648\u0636\u0648\u0639 \u0631\u0627 \u0628\u0628\u06cc\u0646\u06cc\u062f.  \u0627\u0635\u0637\u0644\u0627\u062d &#8220;\u0627\u06cc\u0641\u0644&#8221; \u0627\u0633\u062a \u0631\u0648\u06cc \u0628\u0627\u0644\u0627\u062a\u0631\u06cc\u0646.  \u0647\u0645\u0686\u0646\u06cc\u0646\u060c \u0628\u062f\u06cc\u0647\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0627\u0635\u0637\u0644\u0627\u062d \u00ab\u0627\u06cc\u0641\u0644\u00bb \u0628\u06cc\u0634\u062a\u0631 \u062f\u0631 \u0627\u06cc\u0646 \u0645\u0648\u0636\u0648\u0639 \u0648\u062c\u0648\u062f \u062f\u0627\u0634\u062a\u0647 \u0627\u0633\u062a.<\/p>\n<p>\u0627\u0632 \u0637\u0631\u0641 \u062f\u06cc\u06af\u0631\u060c \u0627\u06af\u0631 \u0628\u0647 \u0627\u0635\u0637\u0644\u0627\u062d &#8220;\u0641\u0631\u0627\u0646\u0633\u0648\u06cc&#8221; \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u062f\u060c \u0628\u0647 \u0648\u0636\u0648\u062d \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0628\u06cc\u0646\u06cc\u062f \u06a9\u0647 \u062d\u062f\u0648\u062f \u0646\u06cc\u0645\u06cc \u0627\u0632 \u0631\u062e\u062f\u0627\u062f\u0647\u0627\u06cc \u0627\u06cc\u0646 \u0627\u0635\u0637\u0644\u0627\u062d \u062f\u0631 \u0627\u06cc\u0646 \u0645\u0648\u0636\u0648\u0639 \u0627\u0633\u062a.  \u0627\u06cc\u0646 \u0628\u0647 \u0627\u06cc\u0646 \u062f\u0644\u06cc\u0644 \u0627\u0633\u062a \u06a9\u0647 \u0645\u0628\u062d\u062b 3\u060c \u06cc\u0639\u0646\u06cc &#8220;\u0645\u0648\u0646\u0627\u0644\u06cc\u0632\u0627&#8221; \u0646\u06cc\u0632 \u0686\u0646\u062f\u06cc\u0646 \u0628\u0627\u0631 \u0639\u0628\u0627\u0631\u062a &#8220;\u0641\u0631\u0627\u0646\u0633\u0648\u06cc&#8221; \u0631\u0627 \u062f\u0631 \u062e\u0648\u062f \u062f\u0627\u0631\u062f.  \u0628\u0631\u0627\u06cc \u062a\u0623\u06cc\u06cc\u062f \u0627\u06cc\u0646 \u0645\u0648\u0631\u062f\u060c \u06a9\u0644\u06cc\u06a9 \u06a9\u0646\u06cc\u062f \u0631\u0648\u06cc \u062f\u0627\u06cc\u0631\u0647 \u0628\u0631\u0627\u06cc \u0645\u0628\u062d\u062b 3 \u0648 \u0645\u0627\u0648\u0633 \u0631\u0627 \u0631\u0648\u06cc \u0639\u0628\u0627\u0631\u062a \u00ab\u0641\u0631\u0627\u0646\u0633\u0648\u06cc\u00bb \u0646\u06af\u0647 \u062f\u0627\u0631\u06cc\u062f.<\/p>\n<h2 id=\"topicmodelingvialsi\"><span class=\"ez-toc-section\" id=\"%d9%85%d8%af%d9%84_%d8%b3%d8%a7%d8%b2%db%8c_%d9%85%d9%88%d8%b6%d9%88%d8%b9_%d8%a7%d8%b2_%d8%b7%d8%b1%db%8c%d9%82_lsi\"><\/span>\u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0627\u0632 \u0637\u0631\u06cc\u0642 LSI<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u062f\u0631 \u0642\u0633\u0645\u062a \u0642\u0628\u0644 \u0631\u0648\u0634 \u0627\u0646\u062c\u0627\u0645 \u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0627\u0632 \u0637\u0631\u06cc\u0642 LDA \u0631\u0627 \u062f\u06cc\u062f\u06cc\u0645.  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0628\u0628\u06cc\u0646\u06cc\u0645 \u0686\u06af\u0648\u0646\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u0645 \u0645\u062f\u0644\u200c\u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0631\u0627 \u0627\u0632 \u0637\u0631\u06cc\u0642 \u0646\u0645\u0627\u06cc\u0647\u200c\u0633\u0627\u0632\u06cc \u0645\u0639\u0646\u0627\u06cc\u06cc \u067e\u0646\u0647\u0627\u0646 (LSI) \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u0645.<\/p>\n<p>\u0628\u0631\u0627\u06cc \u0627\u0646\u062c\u0627\u0645 \u0627\u06cc\u0646 \u06a9\u0627\u0631\u060c \u062a\u0646\u0647\u0627 \u06a9\u0627\u0631\u06cc \u06a9\u0647 \u0628\u0627\u06cc\u062f \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u062f \u0627\u06cc\u0646 \u0627\u0633\u062a \u06a9\u0647 \u0627\u0632 \u0622\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f <code>LsiModel<\/code> \u06a9\u0644\u0627\u0633  \u0628\u0642\u06cc\u0647 \u06cc process \u06a9\u0627\u0645\u0644\u0627\u064b \u0645\u0634\u0627\u0628\u0647 \u0686\u06cc\u0632\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0642\u0628\u0644\u0627\u064b \u0628\u0627 LDA \u062f\u0646\u0628\u0627\u0644 \u06a9\u0631\u062f\u06cc\u0645.<\/p>\n<p>\u0628\u0647 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0632\u06cc\u0631 \u0646\u06af\u0627\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> gensim.models <span class=\"hljs-keyword\">import<\/span> LsiModel\n\nlsi_model = LsiModel(gensim_corpus, num_topics=<span class=\"hljs-number\">4<\/span>, id2word=gensim_dictionary)\ntopics = lsi_model.print_topics(num_words=<span class=\"hljs-number\">10<\/span>)\n<span class=\"hljs-keyword\">for<\/span> topic <span class=\"hljs-keyword\">in<\/span> topics:\n    <span class=\"hljs-built_in\">print<\/span>(topic)\n<\/code><\/pre>\n<p>\u062e\u0631\u0648\u062c\u06cc \u0628\u0647 \u0634\u06a9\u0644 \u0632\u06cc\u0631 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">(0, '-0.337*\"intelligence\" + -0.297*\"machine\" + -0.250*\"artificial\" + -0.240*\"problem\" + -0.208*\"system\" + -0.200*\"learning\" + -0.166*\"network\" + -0.161*\"climate\" + -0.159*\"research\" + -0.153*\"change\"')\n\n(1, '-0.453*\"climate\" + -0.377*\"change\" + -0.344*\"warming\" + -0.326*\"global\" + -0.196*\"emission\" + -0.177*\"greenhouse\" + -0.168*\"effect\" + 0.162*\"intelligence\" + -0.158*\"temperature\" + 0.143*\"machine\"')\n\n(2, '0.688*\"painting\" + 0.346*\"leonardo\" + 0.179*\"louvre\" + 0.175*\"eiffel\" + 0.170*\"portrait\" + 0.147*\"french\" + 0.127*\"museum\" + 0.117*\"century\" + 0.109*\"original\" + 0.092*\"giocondo\"')\n\n(3, '-0.656*\"eiffel\" + 0.259*\"painting\" + -0.184*\"second\" + -0.145*\"exposition\" + -0.145*\"structure\" + 0.135*\"leonardo\" + -0.128*\"tallest\" + -0.116*\"engineer\" + -0.112*\"french\" + -0.107*\"design\"')\n<\/code><\/pre>\n<h2 id=\"conclusion\"><span class=\"ez-toc-section\" id=\"%d9%86%d8%aa%db%8c%d8%ac%d9%87\"><\/span>\u0646\u062a\u06cc\u062c\u0647<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0645\u062f\u0644 \u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u06cc\u06a9 \u06a9\u0627\u0631 \u0645\u0647\u0645 NLP \u0627\u0633\u062a.  \u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627 \u0648 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647\u200c\u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641\u06cc \u0648\u062c\u0648\u062f \u062f\u0627\u0631\u062f \u06a9\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646 \u0627\u0632 \u0622\u0646\u0647\u0627 \u0628\u0631\u0627\u06cc \u0645\u062f\u0644\u200c\u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0631\u062f.  \u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0631\u0648\u0634 \u0645\u062f\u0644\u200c\u0633\u0627\u0632\u06cc \u0645\u0648\u0636\u0648\u0639 \u0631\u0627 \u0627\u0632 \u0637\u0631\u06cc\u0642 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627\u06cc LDA \u0648 LSI \u062f\u06cc\u062f\u06cc\u0645.  \u0645\u0627 \u0647\u0645\u0686\u0646\u06cc\u0646 \u062f\u06cc\u062f\u06cc\u0645 \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 \u0646\u062a\u0627\u06cc\u062c \u0645\u062f\u0644 LDA \u062e\u0648\u062f \u0631\u0627 \u062a\u062c\u0633\u0645 \u06a9\u0646\u06cc\u0645.<\/p>\n<\/div>\n<p><script>\n                        !function(f,b,e,v,n,t,s)\n                        {if(f.fbq)return;n=f.fbq=function(){n.callMethod?\n                        n.callMethod.apply(n,arguments):n.queue.push(arguments)};\n                        if(!f._fbq)f._fbq=n;n.push=n;n.loaded=!0;n.version='2.0';\n                        n.queue=();t=b.createElement(e);t.async=!0;\n                        t.src=v;s=b.getElementsByTagName(e)(0);\n                        s.parentNode.insertBefore(t,s)}(window, document,'script',\n                        'https:\/\/connect.facebook.net\/en_US\/fbevents.js');\n                        fbq('init', '525232124909042');\n                        fbq('track', 'PageView');\n                    <\/script>    (\u0628\u0631\u0686\u0633\u0628\u200c\u0647\u0627 \u0628\u0647 \u062a\u0631\u062c\u0645\u0647)# python<br \/>\n<br \/><br \/>\n<br \/>\u0645\u0646\u062a\u0634\u0631 \u0634\u062f\u0647 \u062f\u0631 1403-01-22 22:06:04<br \/>\n<\/p>\n\n\n<div class=\"kk-star-ratings kksr-auto kksr-align-center kksr-valign-bottom\"\n    data-payload='{&quot;align&quot;:&quot;center&quot;,&quot;id&quot;:&quot;16267&quot;,&quot;slug&quot;:&quot;default&quot;,&quot;valign&quot;:&quot;bottom&quot;,&quot;ignore&quot;:&quot;&quot;,&quot;reference&quot;:&quot;auto&quot;,&quot;class&quot;:&quot;&quot;,&quot;count&quot;:&quot;0&quot;,&quot;legendonly&quot;:&quot;&quot;,&quot;readonly&quot;:&quot;&quot;,&quot;score&quot;:&quot;0&quot;,&quot;starsonly&quot;:&quot;&quot;,&quot;best&quot;:&quot;5&quot;,&quot;gap&quot;:&quot;5&quot;,&quot;greet&quot;:&quot;\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628&quot;,&quot;legend&quot;:&quot;0\\\/5 (0 \u0631\u0627\u06cc)&quot;,&quot;size&quot;:&quot;30&quot;,&quot;title&quot;:&quot;\u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc NLP: \u06a9\u0627\u0631 \u0628\u0627 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim (\u0628\u062e\u0634 2)&quot;,&quot;width&quot;:&quot;0&quot;,&quot;_legend&quot;:&quot;{score}\\\/{best} ({count} \u0631\u0627\u06cc)&quot;,&quot;font_factor&quot;:&quot;1.25&quot;}'>\n            \n<div class=\"kksr-stars\">\n    \n<div class=\"kksr-stars-inactive\">\n            <div class=\"kksr-star\" data-star=\"1\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"2\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"3\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"4\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"5\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n    \n<div class=\"kksr-stars-active\" style=\"width: 0px;\">\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n<\/div>\n                \n\n<div class=\"kksr-legend\" style=\"font-size: 24px;\">\n            <span class=\"kksr-muted\">\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628<\/span>\n    <\/div>\n    <\/div>\n","protected":false},"excerpt":{"rendered":"<p><span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 10<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span>\u0627\u06cc\u0646 \u06cc\u0627\u0632\u062f\u0647\u0645\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0645\u0646 \u0627\u0632 \u0633\u0631\u06cc \u0645\u0642\u0627\u0644\u0627\u062a \u0627\u0633\u062a \u0631\u0648\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc NLP \u0648 \u0645\u0642\u0627\u0644\u0647 \u062f\u0648\u0645 \u0631\u0648\u06cc \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u062f\u0631 \u0627\u06cc\u0646 \u0645\u062c\u0645\u0648\u0639\u0647. \u062f\u0631 \u0645\u0642\u0627\u0644\u0647 \u0642\u0628\u0644\u06cc\u060c \u0645\u0639\u0631\u0641\u06cc \u0645\u062e\u062a\u0635\u0631\u06cc \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Gensim \u067e\u0627\u06cc\u062a\u0648\u0646 \u0627\u0631\u0627\u0626\u0647 \u062f\u0627\u062f\u0645. \u0645\u0646 \u062a\u0648\u0636\u06cc\u062d \u062f\u0627\u062f\u0645 \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u0645 \u0641\u0631\u0647\u0646\u06af\u200c\u0647\u0627\u06cc \u0644\u063a\u062a \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u0645 \u06a9\u0647 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u0628\u0647 \u0634\u0646\u0627\u0633\u0647\u200c\u0647\u0627\u06cc \u0639\u062f\u062f\u06cc \u0645\u062a\u0646\u0627\u0638\u0631\u0634\u0627\u0646 \u0646\u06af\u0627\u0634\u062a \u0645\u06cc\u200c\u06a9\u0646\u0646\u062f. \u0645\u0627 \u0628\u06cc\u0634\u062a\u0631 \u062f\u0631 \u0645\u0648\u0631\u062f [&hellip;]<\/p>\n","protected":false},"author":3,"featured_media":16268,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1743,620,1686],"tags":[],"class_list":["post-16267","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-python","category-programming","category-ai"],"acf":[],"_links":{"self":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/16267","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/comments?post=16267"}],"version-history":[{"count":0,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/16267\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media\/16268"}],"wp:attachment":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media?parent=16267"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/categories?post=16267"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/tags?post=16267"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}