{"id":14745,"date":"2024-01-06T05:24:15","date_gmt":"2024-01-06T01:54:15","guid":{"rendered":"https:\/\/rasanegar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/"},"modified":"2024-01-06T05:24:15","modified_gmt":"2024-01-06T01:54:15","slug":"%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras","status":"publish","type":"post","link":"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/","title":{"rendered":"\u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 5 \u062e\u0637\u06cc \u0628\u0647 \u0633\u0628\u06a9 GPT \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0627 TensorFlow\/Keras"},"content":{"rendered":"<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_85 counter-hierarchy ez-toc-counter ez-toc-custom ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\"><p class=\"ez-toc-title\" style=\"cursor:inherit\">\u0633\u0631\u0641\u0635\u0644\u0647\u0627\u06cc \u0645\u0637\u0644\u0628<\/p>\n<\/div><nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#%d8%a7%d9%86%d9%88%d8%a7%d8%b9_llm_%d9%88_gpt-fyodor\" >\u0627\u0646\u0648\u0627\u0639 LLM \u0648 GPT-Fyodor<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#kerasnlp\" >KerasNLP<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#%d9%be%db%8c%d8%a7%d8%af%d9%87_%d8%b3%d8%a7%d8%b2%db%8c_%d9%85%d8%af%d9%84_gpt-style_%d8%a8%d8%a7_keras\" >\u067e\u06cc\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u0645\u062f\u0644 GPT-Style \u0628\u0627 Keras<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#%d8%af%d8%b1_%d8%ad%d8%a7%d9%84_%d8%a8%d8%a7%d8%b1%da%af%db%8c%d8%b1%db%8c_%d8%af%d8%a7%d8%af%d9%87_%d9%87%d8%a7\" >\u062f\u0631 \u062d\u0627\u0644 \u0628\u0627\u0631\u06af\u06cc\u0631\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#%d8%a8%d8%b1%d8%af%d8%a7%d8%b1_%d8%b3%d8%a7%d8%b2%db%8c_%d9%85%d8%aa%d9%86\" >\u0628\u0631\u062f\u0627\u0631 \u0633\u0627\u0632\u06cc \u0645\u062a\u0646<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%d9%85%d8%ac%d9%85%d9%88%d8%b9%d9%87_%d8%af%d8%a7%d8%af%d9%87\" >\u0627\u06cc\u062c\u0627\u062f \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#%d8%aa%d8%b9%d8%b1%db%8c%d9%81_%d9%85%d8%af%d9%84\" >\u062a\u0639\u0631\u06cc\u0641 \u0645\u062f\u0644<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#%d9%be%d8%a7%d8%b3%d8%ae_%d8%a8%d9%87_%d8%aa%d9%85%d8%a7%d8%b3_%d8%b3%d9%81%d8%a7%d8%b1%d8%b4%db%8c\" >\u067e\u0627\u0633\u062e \u0628\u0647 \u062a\u0645\u0627\u0633 \u0633\u0641\u0627\u0631\u0634\u06cc<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#%d8%a2%d9%85%d9%88%d8%b2%d8%b4_%d9%85%d8%af%d9%84\" >\u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#%d8%a7%d8%b3%d8%aa%d9%86%d8%aa%d8%a7%d8%ac_%d9%85%d8%af%d9%84\" >\u0627\u0633\u062a\u0646\u062a\u0627\u062c \u0645\u062f\u0644<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-11\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#%d8%a8%d9%87%d8%a8%d9%88%d8%af_%d9%86%d8%aa%d8%a7%db%8c%d8%ac%d8%9f\" >\u0628\u0647\u0628\u0648\u062f \u0646\u062a\u0627\u06cc\u062c\u061f<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-12\" href=\"https:\/\/rasanegaar.com\/blog\/%d8%aa%d9%88%d9%84%db%8c%d8%af-%d9%85%d8%aa%d9%86-5-%d8%ae%d8%b7%db%8c-%d8%a8%d9%87-%d8%b3%d8%a8%da%a9-gpt-%d8%af%d8%b1-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86-%d8%a8%d8%a7-tensorflow-keras\/#%d9%86%d8%aa%db%8c%d8%ac%d9%87\" >\u0646\u062a\u06cc\u062c\u0647<\/a><\/li><\/ul><\/nav><\/div>\n<span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 12<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span><p> <br \/>\n<\/p>\n<div><noscript><\/noscript><\/p>\n<p>\u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0631\u0632\u060c \u062d\u062a\u06cc \u0628\u0627 \u0648\u062c\u0648\u062f \u0627\u06cc\u0646\u06a9\u0647 \u062f\u0631 \u0633\u0627\u0644 2017 \u0645\u0646\u062a\u0634\u0631 \u0634\u062f\u060c \u062a\u0646\u0647\u0627 \u062f\u0631 \u0686\u0646\u062f \u0633\u0627\u0644 \u0627\u062e\u06cc\u0631 \u0634\u0631\u0648\u0639 \u0628\u0647 \u062c\u0630\u0628 \u0642\u0627\u0628\u0644 \u062a\u0648\u062c\u0647\u06cc \u06a9\u0631\u062f\u0647 \u0627\u0633\u062a.  \u0628\u0627 \u06af\u0633\u062a\u0631\u0634 \u0641\u0646\u0627\u0648\u0631\u06cc \u0627\u0632 \u0637\u0631\u06cc\u0642 \u067e\u0644\u062a\u0641\u0631\u0645 \u0647\u0627\u06cc\u06cc \u0645\u0627\u0646\u0646\u062f HuggingFace\u060c NLP \u0648 <em>\u0645\u062f\u0644\u200c\u0647\u0627\u06cc \u0632\u0628\u0627\u0646 \u0628\u0632\u0631\u06af (LLM)<\/em> \u062f\u0631 \u062f\u0633\u062a\u0631\u0633 \u062a\u0631 \u0627\u0632 \u0647\u0645\u06cc\u0634\u0647 \u0634\u062f\u0647 \u0627\u0646\u062f.<\/p>\n<p>\u0628\u0627 \u0627\u06cc\u0646 \u062d\u0627\u0644 &#8211; \u062d\u062a\u06cc \u0628\u0627 \u0647\u0645\u0647 \u062a\u0628\u0644\u06cc\u063a\u0627\u062a \u0627\u0637\u0631\u0627\u0641 \u0622\u0646\u0647\u0627 \u0648 \u0628\u0627 <em>\u0632\u06cc\u0627\u062f<\/em> \u0631\u0627\u0647\u0646\u0645\u0627\u0647\u0627\u06cc \u062a\u0626\u0648\u0631\u06cc \u06af\u0631\u0627\u060c \u067e\u06cc\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u0647\u0627\u06cc \u0633\u0641\u0627\u0631\u0634\u06cc \u0632\u06cc\u0627\u062f\u06cc \u0628\u0647 \u0635\u0648\u0631\u062a \u0622\u0646\u0644\u0627\u06cc\u0646 \u0648\u062c\u0648\u062f \u0646\u062f\u0627\u0631\u062f\u060c \u0648 \u0645\u0646\u0627\u0628\u0639 \u0628\u0647 \u0622\u0633\u0627\u0646\u06cc \u0645\u0627\u0646\u0646\u062f \u0628\u0631\u062e\u06cc \u0627\u0632 \u0627\u0646\u0648\u0627\u0639 \u0634\u0628\u06a9\u0647 \u0647\u0627\u06cc \u062f\u06cc\u06af\u0631 \u06a9\u0647 \u0645\u062f\u062a \u0637\u0648\u0644\u0627\u0646\u06cc \u062a\u0631\u06cc \u0648\u062c\u0648\u062f \u062f\u0627\u0634\u062a\u0647 \u0627\u0646\u062f\u060c \u062f\u0631 \u062f\u0633\u062a\u0631\u0633 \u0646\u06cc\u0633\u062a\u0646\u062f.  \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0686\u0631\u062e\u0647 \u06a9\u0627\u0631\u06cc \u062e\u0648\u062f \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06cc\u06a9 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0627\u0632 \u067e\u06cc\u0634 \u0633\u0627\u062e\u062a\u0647 \u0634\u062f\u0647 \u0627\u0632 HuggingFace (\u0645\u0648\u0636\u0648\u0639 \u0631\u0627\u0647\u0646\u0645\u0627\u06cc \u062f\u06cc\u06af\u0631\u06cc) \u0633\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f &#8211; \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0647 <em>\u0627\u062d\u0633\u0627\u0633 \u06a9\u0646\u06cc\u062f<\/em> \u0686\u06af\u0648\u0646\u0647 \u06a9\u0627\u0631 \u0645\u06cc \u06a9\u0646\u062f \u0628\u0627 \u0633\u0627\u062e\u062a\u0646 \u06cc\u06a9\u06cc \u0627\u0632 \u062e\u0648\u062f\u062a\u0627\u0646\u060c \u0642\u0628\u0644 \u0627\u0632 \u0627\u0646\u062a\u0632\u0627\u0639 \u0622\u0646 \u0627\u0632 \u0637\u0631\u06cc\u0642 \u06cc\u06a9 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647.  \u0645\u0627 \u062a\u0645\u0631\u06a9\u0632 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f \u0631\u0648\u06cc \u0633\u0627\u062e\u062a\u0645\u0627\u0646\u060c \u0628\u0647 \u062c\u0627\u06cc \u062a\u0626\u0648\u0631\u06cc \u0648 \u0628\u0647\u06cc\u0646\u0647 \u0633\u0627\u0632\u06cc \u062f\u0631 \u0627\u06cc\u0646\u062c\u0627.<\/p>\n<blockquote>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0631\u0627\u0647\u0646\u0645\u0627\u060c \u0645\u0627 \u062f\u0631 \u062d\u0627\u0644 \u0633\u0627\u062e\u062a\u0646 \u06cc\u06a9 <strong>\u0645\u062f\u0644 \u0632\u0628\u0627\u0646 \u062e\u0648\u062f\u0631\u06af\u0631\u0633\u06cc\u0648\u0646<\/strong> \u0628\u0647 <strong>\u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646<\/strong>.  \u0645\u0627 \u062a\u0645\u0631\u06a9\u0632 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f \u0631\u0648\u06cc \u062c\u0646\u0628\u0647 \u0647\u0627\u06cc \u0639\u0645\u0644\u06cc \u0648 \u062d\u062f\u0627\u0642\u0644\u06cc \/ \u0645\u062e\u062a\u0635\u0631 \u0628\u0627\u0631\u06af\u0630\u0627\u0631\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627\u060c \u062a\u0642\u0633\u06cc\u0645 \u0622\u0646\u060c \u0628\u0631\u062f\u0627\u0631 \u06a9\u0631\u062f\u0646 \u0622\u0646\u060c \u0633\u0627\u062e\u062a \u06cc\u06a9 \u0645\u062f\u0644\u060c \u0646\u0648\u0634\u062a\u0646 \u06cc\u06a9 \u062a\u0645\u0627\u0633 \u0633\u0641\u0627\u0631\u0634\u06cc \u0648 \u0622\u0645\u0648\u0632\u0634\/\u0627\u0633\u062a\u0646\u062a\u0627\u062c.  \u0647\u0631 \u06cc\u06a9 \u0627\u0632 \u0627\u06cc\u0646 \u0648\u0638\u0627\u06cc\u0641 \u0631\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0628\u0647 \u0631\u0627\u0647\u0646\u0645\u0627\u0647\u0627\u06cc \u062f\u0642\u06cc\u0642 \u062a\u0631\u06cc \u062a\u0642\u0633\u06cc\u0645 \u06a9\u0631\u062f\u060c \u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646 \u067e\u06cc\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u0631\u0627 \u0628\u0647 \u0635\u0648\u0631\u062a \u0639\u0645\u0648\u0645\u06cc \u0646\u06af\u0647 \u0645\u06cc \u062f\u0627\u0631\u06cc\u0645 \u0648 \u0628\u0633\u062a\u0647 \u0628\u0647 \u0622\u0646\u060c \u0641\u0636\u0627\u06cc\u06cc \u0631\u0627 \u0628\u0631\u0627\u06cc \u0633\u0641\u0627\u0631\u0634\u06cc \u0633\u0627\u0632\u06cc \u0648 \u0628\u0647\u06cc\u0646\u0647 \u0633\u0627\u0632\u06cc \u0628\u0627\u0642\u06cc \u0645\u06cc \u06af\u0630\u0627\u0631\u06cc\u0645. \u0631\u0648\u06cc \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u062e\u0648\u062f\u062a\u0627\u0646<\/p>\n<\/blockquote>\n<h2 id=\"typesofllmsandgptfyodor\"><span class=\"ez-toc-section\" id=\"%d8%a7%d9%86%d9%88%d8%a7%d8%b9_llm_%d9%88_gpt-fyodor\"><\/span>\u0627\u0646\u0648\u0627\u0639 LLM \u0648 GPT-Fyodor<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u0637\u0628\u0642\u0647 \u0628\u0646\u062f\u06cc \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u0628\u0633\u06cc\u0627\u0631 \u067e\u06cc\u0686\u06cc\u062f\u0647 \u062a\u0631 \u0634\u0648\u062f &#8211; \u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f <em>\u0628\u0647 \u0637\u0648\u0631 \u06af\u0633\u062a\u0631\u062f\u0647<\/em> \u0645\u062f\u0644 \u0647\u0627\u06cc \u0632\u0628\u0627\u0646 \u0645\u0628\u062a\u0646\u06cc \u0628\u0631 Transformer \u0631\u0627 \u0628\u0647 \u0633\u0647 \u062f\u0633\u062a\u0647 \u062f\u0633\u062a\u0647 \u0628\u0646\u062f\u06cc \u06a9\u0646\u06cc\u062f:<\/p>\n<ul>\n<li><strong>\u0645\u062f\u0644 \u0647\u0627\u06cc \u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0631\u0645\u0632\u06af\u0630\u0627\u0631<\/strong> &#8211; \u0622\u0644\u0628\u0631\u062a\u060c \u0628\u0631\u062a\u060c \u062f\u06cc\u0633\u062a\u06cc\u0644\u0628\u0631\u062a\u060c \u0631\u0648\u0628\u0631\u062a\u0627<\/li>\n<li><strong>\u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0631\u0645\u0632\u06af\u0634\u0627<\/strong> &#8211; GPT\u060c GPT-2\u060c GPT-3\u060c TransformerXL<\/li>\n<li><strong>\u0645\u062f\u0644 \u0647\u0627\u06cc Seq2Seq<\/strong> &#8211; BART\u060c mBART\u060c T5<\/li>\n<\/ul>\n<p><em>\u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0631\u0645\u0632\u06af\u0630\u0627\u0631<\/em> \u0645\u062f\u0644\u200c\u0647\u0627 \u0641\u0642\u0637 \u0627\u0632 \u0631\u0645\u0632\u06af\u0630\u0627\u0631 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u062f\u0631 \u0645\u0639\u0645\u0627\u0631\u06cc \u062e\u0648\u062f \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u06a9\u0646\u0646\u062f (\u0645\u0639\u0645\u0648\u0644\u0627\u064b \u067e\u0634\u062a\u0647\u200c\u0627\u06cc) \u0648 \u0628\u0631\u0627\u06cc \u062f\u0631\u06a9 \u062c\u0645\u0644\u0627\u062a (\u0637\u0628\u0642\u0647\u200c\u0628\u0646\u062f\u06cc\u060c \u0634\u0646\u0627\u0633\u0627\u06cc\u06cc \u0645\u0648\u062c\u0648\u062f\u06cc\u062a \u0646\u0627\u0645\u200c\u06af\u0630\u0627\u0631\u06cc \u0634\u062f\u0647\u060c \u067e\u0627\u0633\u062e \u0628\u0647 \u0633\u0624\u0627\u0644) \u0639\u0627\u0644\u06cc \u0647\u0633\u062a\u0646\u062f.<\/p>\n<p><em>\u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0631\u0645\u0632\u06af\u0634\u0627<\/em> \u0645\u062f\u0644\u200c\u0647\u0627 \u0641\u0642\u0637 \u0627\u0632 \u0631\u0645\u0632\u06af\u0634\u0627\u06cc \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u062f\u0631 \u0645\u0639\u0645\u0627\u0631\u06cc \u062e\u0648\u062f \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u06a9\u0646\u0646\u062f (\u0647\u0645\u0686\u0646\u06cc\u0646 \u0645\u0639\u0645\u0648\u0644\u0627\u064b \u0627\u0646\u0628\u0627\u0634\u062a\u0647 \u0634\u062f\u0647) \u0648 \u0628\u0631\u0627\u06cc \u067e\u06cc\u0634\u200c\u0628\u06cc\u0646\u06cc \u0622\u06cc\u0646\u062f\u0647 \u0639\u0627\u0644\u06cc \u0647\u0633\u062a\u0646\u062f\u060c \u06a9\u0647 \u0622\u0646\u0647\u0627 \u0631\u0627 \u0628\u0631\u0627\u06cc \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 \u0645\u0646\u0627\u0633\u0628 \u0645\u06cc\u200c\u06a9\u0646\u062f.<\/p>\n<p><em>Seq2Seq<\/em> \u0645\u062f\u0644\u200c\u0647\u0627 \u0647\u0645 \u0631\u0645\u0632\u06af\u0630\u0627\u0631 \u0648 \u0647\u0645 \u0631\u0645\u0632\u06af\u0634\u0627 \u0631\u0627 \u062a\u0631\u06a9\u06cc\u0628 \u0645\u06cc\u200c\u06a9\u0646\u0646\u062f \u0648 \u062f\u0631 \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646\u060c \u062e\u0644\u0627\u0635\u0647\u200c\u0633\u0627\u0632\u06cc \u0648 \u0645\u0647\u0645\u200c\u062a\u0631 \u0627\u0632 \u0647\u0645\u0647 &#8211; \u062a\u0631\u062c\u0645\u0647 \u0639\u0627\u0644\u06cc \u0647\u0633\u062a\u0646\u062f.<\/p>\n<p>\u062e\u0627\u0646\u0648\u0627\u062f\u0647 \u0645\u062f\u0644\u200c\u0647\u0627\u06cc GPT \u06a9\u0647 \u062f\u0631 \u0686\u0646\u062f \u0633\u0627\u0644 \u06af\u0630\u0634\u062a\u0647 \u0645\u062d\u0628\u0648\u0628\u06cc\u062a \u0632\u06cc\u0627\u062f\u06cc \u0628\u0647 \u062f\u0633\u062a \u0622\u0648\u0631\u062f\u0647\u200c\u0627\u0646\u062f\u060c \u0645\u062f\u0644\u200c\u0647\u0627\u06cc \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0631\u0645\u0632\u06af\u0634\u0627 \u0647\u0633\u062a\u0646\u062f \u0648 \u062f\u0631 \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646\u200c\u0647\u0627\u06cc \u0622\u0645\u0648\u0632\u0634\u200c\u062f\u06cc\u062f\u0647 \u0634\u0628\u06cc\u0647 \u0627\u0646\u0633\u0627\u0646 \u0639\u0627\u0644\u06cc \u0647\u0633\u062a\u0646\u062f. \u0631\u0648\u06cc \u0645\u062c\u0645\u0648\u0639\u0647 \u0647\u0627\u06cc \u0628\u0632\u0631\u06af\u06cc \u0627\u0632 \u062f\u0627\u062f\u0647 \u0647\u0627\u060c \u0648 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u06cc\u06a9 \u0628\u0630\u0631 \u0634\u0631\u0648\u0639 \u062c\u062f\u06cc\u062f \u0628\u0631\u0627\u06cc \u062a\u0648\u0644\u06cc\u062f\u060c \u062f\u0631\u062e\u0648\u0627\u0633\u062a \u062f\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f.  \u0628\u0631\u0627\u06cc \u0645\u062b\u0627\u0644:<\/p>\n<pre><code class=\"hljs\">generate_text(<span class=\"hljs-string\">'the truth ultimately is'<\/span>)\n<\/code><\/pre>\n<p>\u06a9\u0647 \u062f\u0631 \u0632\u06cc\u0631 \u0647\u0648\u062f \u0627\u06cc\u0646 \u062f\u0633\u062a\u0648\u0631 \u0631\u0627 \u0628\u0647 \u06cc\u06a9 \u0645\u062f\u0644 GPT \u0645\u0627\u0646\u0646\u062f \u0645\u06cc\u200c\u0631\u0633\u0627\u0646\u062f \u0648 \u062a\u0648\u0644\u06cc\u062f \u0645\u06cc\u200c\u06a9\u0646\u062f:<\/p>\n<pre><code class=\"hljs\">'the truth ultimately is really a joy in history, this state of life through which is almost invisible, superfluous  teleological...'\n<\/code><\/pre>\n<p>\u0627\u06cc\u0646 \u062f\u0631 \u0648\u0627\u0642\u0639 \u06cc\u06a9 \u0627\u0633\u067e\u0648\u06cc\u0644\u0631 \u06a9\u0648\u0686\u06a9 \u0627\u0632 \u0627\u0646\u062a\u0647\u0627\u06cc \u0631\u0627\u0647\u0646\u0645\u0627 \u0627\u0633\u062a!  \u06cc\u06a9 \u0627\u0633\u067e\u0648\u06cc\u0644\u0631 \u06a9\u0648\u0686\u06a9 \u062f\u06cc\u06af\u0631 \u0645\u0639\u0645\u0627\u0631\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0622\u0646 \u0645\u062a\u0646 \u0631\u0627 \u062a\u0648\u0644\u06cc\u062f \u06a9\u0631\u062f\u0647 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">inputs = layers.Input(shape=(maxlen,))\nembedding_layer = keras_nlp.layers.TokenAndPositionEmbedding(vocab_size, maxlen, embed_dim)(inputs)\ntransformer_block = keras_nlp.layers.TransformerDecoder(embed_dim, num_heads)(embedding_layer)\noutputs = layers.Dense(vocab_size, activation=<span class=\"hljs-string\">'softmax'<\/span>)(transformer_block)\n    \nmodel = keras.Model(inputs=inputs, outputs=outputs)\n<\/code><\/pre>\n<p>5 \u062e\u0637 \u062a\u0645\u0627\u0645 \u0686\u06cc\u0632\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0628\u0631\u0627\u06cc \u0633\u0627\u062e\u062a \u06cc\u06a9 \u0645\u062f\u0644 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0641\u0642\u0637 \u0631\u0645\u0632\u06af\u0634\u0627 \u0644\u0627\u0632\u0645 \u0627\u0633\u062a &#8211; \u0634\u0628\u06cc\u0647 \u0633\u0627\u0632\u06cc \u06cc\u06a9 GPT \u06a9\u0648\u0686\u06a9.  \u0627\u0632 \u0622\u0646\u062c\u0627\u06cc\u06cc \u06a9\u0647 \u0645\u0627 \u0645\u062f\u0644 \u0631\u0627 \u0622\u0645\u0648\u0632\u0634 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u0627\u062f \u0631\u0648\u06cc \u0631\u0645\u0627\u0646\u200c\u0647\u0627\u06cc \u0641\u0626\u0648\u062f\u0648\u0631 \u062f\u0627\u0633\u062a\u0627\u06cc\u0648\u0641\u0633\u06a9\u06cc (\u06a9\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u0622\u0646\u200c\u0647\u0627 \u0631\u0627 \u0628\u0627 \u0647\u0631 \u0686\u06cc\u0632 \u062f\u06cc\u06af\u0631\u06cc \u062c\u0627\u06cc\u06af\u0632\u06cc\u0646 \u06a9\u0646\u06cc\u062f\u060c \u0627\u0632 \u0648\u06cc\u06a9\u06cc\u200c\u067e\u062f\u06cc\u0627 \u06af\u0631\u0641\u062a\u0647 \u062a\u0627 \u0646\u0638\u0631\u0627\u062a \u0631\u062f\u06cc\u062a) &#8211; \u0645\u0627 \u0628\u0647 \u0637\u0648\u0631 \u0622\u0632\u0645\u0627\u06cc\u0634\u06cc \u0645\u062f\u0644 \u0631\u0627 \u0645\u06cc\u200c\u0646\u0627\u0645\u06cc\u0645. <strong>GPT-Fyodor<\/strong>.<\/p>\n<h2 id=\"kerasnlp\"><span class=\"ez-toc-section\" id=\"kerasnlp\"><\/span>KerasNLP<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u062a\u0631\u0641\u0646\u062f \u06cc\u06a9 GPT-Fyodor 5 \u062e\u0637\u06cc \u062f\u0631 \u0627\u06cc\u0646 \u0627\u0633\u062a <a rel=\"nofollow noopener noreferrer\" target=\"_blank\" href=\"https:\/\/github.com\/keras-team\/keras-nlp\"><em>KerasNLP<\/em><\/a>\u060c \u06a9\u0647 \u062a\u0648\u0633\u0637 \u062a\u06cc\u0645 \u0631\u0633\u0645\u06cc Keras \u062a\u0648\u0633\u0639\u0647 \u06cc\u0627\u0641\u062a\u0647 \u0627\u0633\u062a\u060c \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u06cc\u06a9 \u062a\u0648\u0633\u0639\u0647 \u0627\u0641\u0642\u06cc \u0628\u0631\u0627\u06cc Keras\u060c \u06a9\u0647 \u0628\u0647 \u0631\u0648\u0634 \u0648\u0627\u0642\u0639\u06cc Keras\u060c \u0628\u0627 \u0647\u062f\u0641 \u0631\u0633\u0627\u0646\u062f\u0646 NLP \u0628\u0627 \u0642\u062f\u0631\u062a \u0635\u0646\u0639\u062a\u06cc \u0628\u0647 \u0646\u0648\u06a9 \u0627\u0646\u06af\u0634\u062a\u0627\u0646 \u0634\u0645\u0627\u060c \u0628\u0627 \u0644\u0627\u06cc\u0647 \u0647\u0627\u06cc \u062c\u062f\u06cc\u062f (\u0631\u0645\u0632\u06af\u0630\u0627\u0631\u060c \u0631\u0645\u0632\u06af\u0634\u0627\u060c \u062a\u0639\u0628\u06cc\u0647 \u062a\u0648\u06a9\u0646\u060c \u062c\u0627\u0633\u0627\u0632\u06cc \u0645\u0648\u0642\u0639\u06cc\u062a\u060c \u0645\u0639\u06cc\u0627\u0631\u0647\u0627\u060c \u062a\u0648\u06a9\u0646\u0627\u06cc\u0632\u0631\u0647\u0627 \u0648 \u063a\u06cc\u0631\u0647).<\/p>\n<p><strong>KerasNLP \u06cc\u06a9 \u0628\u0627\u063a \u0648\u062d\u0634 \u0646\u0645\u0648\u0646\u0647 \u0646\u06cc\u0633\u062a<\/strong>.  \u0627\u06cc\u0646 \u0628\u062e\u0634\u06cc \u0627\u0632 Keras (\u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u06cc\u06a9 \u0628\u0633\u062a\u0647 \u062c\u062f\u0627\u06af\u0627\u0646\u0647) \u0627\u0633\u062a \u06a9\u0647 \u0645\u0627\u0646\u0639 \u0648\u0631\u0648\u062f \u0628\u0631\u0627\u06cc \u062a\u0648\u0633\u0639\u0647 \u0645\u062f\u0644 NLP \u0631\u0627 \u06a9\u0627\u0647\u0634 \u0645\u06cc \u062f\u0647\u062f\u060c \u0647\u0645\u0627\u0646\u0637\u0648\u0631 \u06a9\u0647 \u0645\u0627\u0646\u0639 \u0648\u0631\u0648\u062f \u0628\u0631\u0627\u06cc \u062a\u0648\u0633\u0639\u0647 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0639\u0645\u06cc\u0642 \u0639\u0645\u0648\u0645\u06cc \u0628\u0627 \u0628\u0633\u062a\u0647 \u0627\u0635\u0644\u06cc \u0631\u0627 \u06a9\u0627\u0647\u0634 \u0645\u06cc \u062f\u0647\u062f.<\/p>\n<div class=\"alert alert-note\">\n<div class=\"flex\">\n<div class=\"flex-shrink-0 mr-3\"><\/div>\n<div class=\"w-full\">\n<p><strong>\u062a\u0648\u062c\u0647 \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u06cc\u062f:<\/strong> \u062a\u0627 \u0632\u0645\u0627\u0646 \u0646\u06af\u0627\u0631\u0634 KerasNLP \u0647\u0646\u0648\u0632 \u062f\u0631 \u062d\u0627\u0644 \u062a\u0648\u0644\u06cc\u062f \u0648 \u062f\u0631 \u0645\u0631\u0627\u062d\u0644 \u0627\u0648\u0644\u06cc\u0647 \u0627\u0633\u062a.  \u062a\u0641\u0627\u0648\u062a \u0647\u0627\u06cc \u0638\u0631\u06cc\u0641 \u0645\u0645\u06a9\u0646 \u0627\u0633\u062a \u062f\u0631 \u0646\u0633\u062e\u0647 \u0647\u0627\u06cc \u0628\u0639\u062f\u06cc \u0648\u062c\u0648\u062f \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u062f.  \u0646\u0648\u0634\u062a\u0646 \u0627\u0632 \u0646\u0633\u062e\u0647 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u06a9\u0646\u062f <code>0.3.0<\/code>.<\/p>\n<\/p><\/div><\/div><\/div>\n<p>\u0628\u0631\u0627\u06cc \u0627\u06cc\u0646\u06a9\u0647 \u0628\u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u0632 KerasNLP \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f\u060c \u0628\u0627\u06cc\u062f \u0622\u0646 \u0631\u0627 \u0627\u0632 \u0637\u0631\u06cc\u0642 \u0646\u0635\u0628 \u06a9\u0646\u06cc\u062f <code>pip<\/code>:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-meta\">$<\/span><span class=\"bash\"> pip install keras_nlp<\/span>\n<\/code><\/pre>\n<p>\u0648 \u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0646\u0633\u062e\u0647 \u0631\u0627 \u0628\u0627:<\/p>\n<pre><code class=\"hljs\">keras_nlp.__version__\n\n<\/code><\/pre>\n<h2 id=\"implementingagptstylemodelwithkeras\"><span class=\"ez-toc-section\" id=\"%d9%be%db%8c%d8%a7%d8%af%d9%87_%d8%b3%d8%a7%d8%b2%db%8c_%d9%85%d8%af%d9%84_gpt-style_%d8%a8%d8%a7_keras\"><\/span>\u067e\u06cc\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u0645\u062f\u0644 GPT-Style \u0628\u0627 Keras<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0628\u06cc\u0627\u06cc\u06cc\u062f \u0628\u0627 \u0648\u0627\u0631\u062f \u06a9\u0631\u062f\u0646 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647\u200c\u0647\u0627\u06cc\u06cc \u06a9\u0647 \u0627\u0632 \u0622\u0646\u0647\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f &#8211; TensorFlow\u060c Keras\u060c KerasNLP \u0648 NumPy \u0634\u0631\u0648\u0639 \u06a9\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> tensorflow <span class=\"hljs-keyword\">as<\/span> tf\n<span class=\"hljs-keyword\">from<\/span> tensorflow <span class=\"hljs-keyword\">import<\/span> keras\n<span class=\"hljs-keyword\">import<\/span> keras_nlp\n<span class=\"hljs-keyword\">import<\/span> numpy <span class=\"hljs-keyword\">as<\/span> np\n<\/code><\/pre>\n<h3 id=\"loadingdata\"><span class=\"ez-toc-section\" id=\"%d8%af%d8%b1_%d8%ad%d8%a7%d9%84_%d8%a8%d8%a7%d8%b1%da%af%db%8c%d8%b1%db%8c_%d8%af%d8%a7%d8%af%d9%87_%d9%87%d8%a7\"><\/span>\u062f\u0631 \u062d\u0627\u0644 \u0628\u0627\u0631\u06af\u06cc\u0631\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0628\u06cc\u0627\u06cc\u06cc\u062f \u062a\u0639\u062f\u0627\u062f\u06cc \u0627\u0632 \u0631\u0645\u0627\u0646\u200c\u0647\u0627\u06cc \u062f\u0627\u0633\u062a\u0627\u06cc\u0648\u0641\u0633\u06a9\u06cc \u0631\u0627 \u0628\u0627\u0631\u06af\u0630\u0627\u0631\u06cc \u06a9\u0646\u06cc\u0645 &#8211; \u06cc\u06a9\u06cc \u0627\u0632 \u0622\u0646\u200c\u0647\u0627 \u0628\u0631\u0627\u06cc \u06cc\u06a9 \u0645\u062f\u0644 \u0628\u0633\u06cc\u0627\u0631 \u06a9\u0648\u062a\u0627\u0647 \u0627\u0633\u062a\u060c \u0628\u062f\u0648\u0646 \u0622\u0646 \u06a9\u0647 \u0627\u0632 \u0645\u0631\u0627\u062d\u0644 \u0627\u0648\u0644\u06cc\u0647 \u0628\u0647 \u0628\u0639\u062f \u06a9\u0645\u06cc \u0628\u06cc\u0634 \u0627\u0632 \u062d\u062f \u0645\u0646\u0627\u0633\u0628 \u0628\u0627\u0634\u062f.  \u0645\u0627 \u0628\u0647 \u0631\u0627\u062d\u062a\u06cc \u0627\u0632 \u0641\u0627\u06cc\u0644 \u0647\u0627\u06cc \u0645\u062a\u0646\u06cc \u062e\u0627\u0645 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f <a rel=\"nofollow noopener noreferrer\" target=\"_blank\" href=\"https:\/\/www.gutenberg.org\/\">\u067e\u0631\u0648\u0698\u0647 \u06af\u0648\u062a\u0646\u0628\u0631\u06af<\/a>\u060c \u0628\u0647 \u062f\u0644\u06cc\u0644 \u0633\u0627\u062f\u06af\u06cc \u06a9\u0627\u0631 \u0628\u0627 \u0686\u0646\u06cc\u0646 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc\u06cc:<\/p>\n<pre><code class=\"hljs\">crime_and_punishment_url = <span class=\"hljs-string\">'https:\/\/www.gutenberg.org\/files\/2554\/2554-0.txt'<\/span>\nbrothers_of_karamazov_url = <span class=\"hljs-string\">'https:\/\/www.gutenberg.org\/files\/28054\/28054-0.txt'<\/span>\nthe_idiot_url = <span class=\"hljs-string\">'https:\/\/www.gutenberg.org\/files\/2638\/2638-0.txt'<\/span>\nthe_possessed_url = <span class=\"hljs-string\">'https:\/\/www.gutenberg.org\/files\/8117\/8117-0.txt'<\/span>\n\npaths = (crime_and_punishment_url, brothers_of_karamazov_url, the_idiot_url, the_possessed_url)\nnames = (<span class=\"hljs-string\">'Crime and Punishment'<\/span>, <span class=\"hljs-string\">'Brothers of Karamazov'<\/span>, <span class=\"hljs-string\">'The Idiot'<\/span>, <span class=\"hljs-string\">'The Possessed'<\/span>)\ntexts = <span class=\"hljs-string\">''<\/span>\n<span class=\"hljs-keyword\">for<\/span> index, path <span class=\"hljs-keyword\">in<\/span> <span class=\"hljs-built_in\">enumerate<\/span>(paths):\n    filepath = keras.utils.get_file(<span class=\"hljs-string\">f'<span class=\"hljs-subst\">{names(index)}<\/span>.txt'<\/span>, origin=path)\n    text = <span class=\"hljs-string\">''<\/span>\n    <span class=\"hljs-keyword\">with<\/span> <span class=\"hljs-built_in\">open<\/span>(filepath, encoding=<span class=\"hljs-string\">'utf-8'<\/span>) <span class=\"hljs-keyword\">as<\/span> f:\n        text = f.read()\n        \n        \n        \n        texts += text(<span class=\"hljs-number\">10000<\/span>:)\n<\/code><\/pre>\n<p>\u0645\u0627 \u0628\u0647 \u0633\u0627\u062f\u06af\u06cc \u0647\u0645\u0647 \u0641\u0627\u06cc\u0644 \u0647\u0627 \u0631\u0627 \u062f\u0627\u0646\u0644\u0648\u062f \u06a9\u0631\u062f\u0647 \u0627\u06cc\u0645\u060c \u0622\u0646\u0647\u0627 \u0631\u0627 \u0645\u0631\u0648\u0631 \u06a9\u0631\u062f\u0647 \u0648 \u0622\u0646\u0647\u0627 \u0631\u0627 \u0628\u0647 \u0647\u0645 \u0645\u062a\u0635\u0644 \u06a9\u0631\u062f\u0647 \u0627\u06cc\u0645 \u0631\u0648\u06cc \u0628\u0627\u0644\u0627\u06cc \u062f\u06cc\u06af\u0631\u06cc  \u0627\u06cc\u0646 \u0634\u0627\u0645\u0644 \u0628\u0631\u062e\u06cc \u0627\u0632 \u062a\u0646\u0648\u0639 \u062f\u0631 \u0632\u0628\u0627\u0646 \u0645\u0648\u0631\u062f \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0633\u062a\u060c \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u0647\u0645\u0686\u0646\u0627\u0646 \u0622\u0646 \u0631\u0627 \u06a9\u0627\u0645\u0644\u0627\u064b \u0641\u0626\u0648\u062f\u0648\u0631 \u062d\u0641\u0638 \u0645\u06cc \u06a9\u0646\u062f!  \u0628\u0631\u0627\u06cc \u0647\u0631 \u0641\u0627\u06cc\u0644\u060c 10 \u0647\u0632\u0627\u0631 \u06a9\u0627\u0631\u0627\u06a9\u062a\u0631 \u0627\u0648\u0644 \u0631\u0627 \u062d\u0630\u0641 \u06a9\u0631\u062f\u0647\u200c\u0627\u06cc\u0645\u060c \u06a9\u0647 \u062a\u0642\u0631\u06cc\u0628\u0627\u064b \u0637\u0648\u0644 \u0645\u062a\u0648\u0633\u0637 \u200b\u200b\u067e\u06cc\u0634\u06af\u0641\u062a\u0627\u0631 \u0648 \u0645\u0642\u062f\u0645\u0647 \u06af\u0648\u062a\u0646\u0628\u0631\u06af \u0627\u0633\u062a\u060c \u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646 \u0628\u0631\u0627\u06cc \u0647\u0631 \u062a\u06a9\u0631\u0627\u0631\u060c \u0628\u062f\u0646\u0647\u200c\u0627\u06cc \u06a9\u0627\u0645\u0644\u0627\u064b \u0633\u0627\u0644\u0645 \u0627\u0632 \u06a9\u062a\u0627\u0628 \u0628\u0627\u0642\u06cc \u0645\u06cc\u200c\u0645\u0627\u0646\u062f.  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0646\u06af\u0627\u0647\u06cc \u0628\u0647 500 \u06a9\u0627\u0631\u0627\u06a9\u062a\u0631 \u062a\u0635\u0627\u062f\u0641\u06cc \u062f\u0631 \u0622\u0646 \u0628\u06cc\u0627\u0646\u062f\u0627\u0632\u06cc\u0645 <code>texts<\/code> \u0631\u0634\u062a\u0647 \u0627\u06a9\u0646\u0648\u0646:<\/p>\n<pre><code class=\"hljs\">\ntexts(<span class=\"hljs-number\">25000<\/span>:<span class=\"hljs-number\">25500<\/span>)\n<\/code><\/pre>\n<pre><code class=\"hljs\">'nd that was why\\nI addressed you at once. For in unfolding to you the story of my life, I\\ndo not wish to make myself a laughing-stock before these idle listeners,\\nwho indeed know all about it already, but I am looking for a man\\nof feeling and education. Know then that my wife was educated in a\\nhigh-class school for the daughters of noblemen, and \u0631\u0648\u06cc leaving she\\ndanced the shawl dance before the governor and other personages for\\nwhich she was presented with a gold medal and a certificate of merit.\\n'\n<\/code><\/pre>\n<p>\u0642\u0628\u0644 \u0627\u0632 \u0627\u0646\u062c\u0627\u0645 \u0647\u0631 \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u06cc\u06af\u0631\u06cc\u060c \u0631\u0634\u062a\u0647 \u0631\u0627 \u0628\u0647 \u062c\u0645\u0644\u0627\u062a \u062c\u062f\u0627 \u0645\u06cc \u06a9\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\">text_list = texts.split(<span class=\"hljs-string\">'.'<\/span>)\n<span class=\"hljs-built_in\">len<\/span>(text_list) \n<\/code><\/pre>\n<p>\u0645\u0627 69 \u0647\u0632\u0627\u0631 \u062c\u0645\u0644\u0647 \u062f\u0627\u0631\u06cc\u0645.  \u0647\u0646\u06af\u0627\u0645\u06cc \u06a9\u0647 \u0634\u0645\u0627 \u062c\u0627\u06cc\u06af\u0632\u06cc\u0646 <code>\\n<\/code> \u06a9\u0627\u0631\u0627\u06a9\u062a\u0631\u0647\u0627 \u0628\u0627 \u0641\u0627\u0635\u0644\u0647 \u062e\u0627\u0644\u06cc \u0648 \u0634\u0645\u0627\u0631\u0634 \u06a9\u0644\u0645\u0627\u062a:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-built_in\">len<\/span>(texts.replace(<span class=\"hljs-string\">'\\n'<\/span>, <span class=\"hljs-string\">' '<\/span>).split(<span class=\"hljs-string\">' '<\/span>)) \n<\/code><\/pre>\n<div class=\"alert alert-note\">\n<div class=\"flex\">\n<div class=\"flex-shrink-0 mr-3\"><\/div>\n<div class=\"w-full\">\n<p><strong>\u062a\u0648\u062c\u0647 \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u06cc\u062f:<\/strong> \u0634\u0645\u0627 \u0645\u0639\u0645\u0648\u0644\u0627\u064b \u0645\u06cc \u062e\u0648\u0627\u0647\u06cc\u062f \u062d\u062f\u0627\u0642\u0644 \u06cc\u06a9 \u0645\u06cc\u0644\u06cc\u0648\u0646 \u06a9\u0644\u0645\u0647 \u062f\u0631 \u06cc\u06a9 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u06cc\u062f\u060c \u0648 \u062f\u0631 \u062d\u0627\u0644\u062a \u0627\u06cc\u062f\u0647 \u0622\u0644\u060c \u062e\u06cc\u0644\u06cc \u0628\u06cc\u0634\u062a\u0631 \u0627\u0632 \u0622\u0646.  \u0645\u0627 \u0628\u0627 \u0686\u0646\u062f \u0645\u06af\u0627\u0628\u0627\u06cc\u062a \u062f\u0627\u062f\u0647 (~ 5 \u0645\u06af\u0627\u0628\u0627\u06cc\u062a) \u06a9\u0627\u0631 \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u0645\u062f\u0644 \u0647\u0627\u06cc \u0632\u0628\u0627\u0646 \u0645\u0639\u0645\u0648\u0644\u0627\u064b \u0622\u0645\u0648\u0632\u0634 \u062f\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u0646\u062f \u0631\u0648\u06cc \u062f\u0647 \u0647\u0627 \u06af\u06cc\u06af\u0627\u0628\u0627\u06cc\u062a \u0645\u062a\u0646  \u0627\u06cc\u0646\u060c \u0628\u0647 \u0637\u0648\u0631 \u0637\u0628\u06cc\u0639\u06cc\u060c \u062a\u0646\u0627\u0633\u0628 \u0628\u06cc\u0634 \u0627\u0632 \u062d\u062f \u0648\u0631\u0648\u062f\u06cc \u0645\u062a\u0646 \u0631\u0627 \u0628\u0633\u06cc\u0627\u0631 \u0622\u0633\u0627\u0646 \u0648 \u062a\u0639\u0645\u06cc\u0645 \u0622\u0646 \u0631\u0627 \u062f\u0634\u0648\u0627\u0631 \u0645\u06cc \u06a9\u0646\u062f (\u06af\u06cc\u062c \u0634\u062f\u0646 \u0632\u06cc\u0627\u062f \u0628\u062f\u0648\u0646 \u0628\u0631\u0627\u0632\u0634 \u0628\u06cc\u0634 \u0627\u0632 \u062d\u062f\u060c \u06cc\u0627 \u06af\u06cc\u062c\u06cc \u06a9\u0645 \u0628\u0627 \u0627\u0636\u0627\u0641\u0647 \u06a9\u0631\u062f\u0646 \u0632\u06cc\u0627\u062f).  \u0646\u062a\u0627\u06cc\u062c \u0631\u0627 \u0628\u0627 \u06cc\u06a9 \u062f\u0627\u0646\u0647 \u0646\u0645\u06a9 \u0628\u06af\u06cc\u0631\u06cc\u062f.<\/p>\n<\/p><\/div><\/div><\/div>\n<p>\u0628\u0627 \u0627\u06cc\u0646 \u0648\u062c\u0648\u062f\u060c \u0627\u062c\u0627\u0632\u0647 \u062f\u0647\u06cc\u062f \u0627\u06cc\u0646\u0647\u0627 \u0631\u0627 \u0628\u0647 \u06cc\u06a9 \u062a\u0642\u0633\u06cc\u0645 \u06a9\u0646\u06cc\u0645 <em>\u0622\u0645\u0648\u0632\u0634<\/em>\u060c <em>\u062a\u0633\u062a<\/em> \u0648 <em>\u0627\u0639\u062a\u0628\u0627\u0631 \u0633\u0646\u062c\u06cc<\/em> \u062a\u0646\u0638\u06cc\u0645.  \u0627\u0628\u062a\u062f\u0627 \u0631\u0634\u062a\u0647 \u0647\u0627\u06cc \u062e\u0627\u0644\u06cc \u0631\u0627 \u062d\u0630\u0641 \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u0648 \u062c\u0645\u0644\u0627\u062a \u0631\u0627 \u0628\u0647 \u0647\u0645 \u0645\u06cc \u0632\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\">\ntext_list = <span class=\"hljs-built_in\">list<\/span>(<span class=\"hljs-built_in\">filter<\/span>(<span class=\"hljs-literal\">None<\/span>, text_list))\n\n<span class=\"hljs-keyword\">import<\/span> random\nrandom.shuffle(text_list)\n<\/code><\/pre>\n<p>\u0633\u067e\u0633\u060c \u062a\u0642\u0633\u06cc\u0645 70\/15\/15 \u0631\u0627 \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc \u062f\u0647\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\">length = <span class=\"hljs-built_in\">len<\/span>(text_list)\ntext_train = text_list(:<span class=\"hljs-built_in\">int<\/span>(<span class=\"hljs-number\">0.7<\/span>*length))\ntext_test = text_list(<span class=\"hljs-built_in\">int<\/span>(<span class=\"hljs-number\">0.7<\/span>*length):<span class=\"hljs-built_in\">int<\/span>(<span class=\"hljs-number\">0.85<\/span>*length))\ntext_valid = text_list(<span class=\"hljs-built_in\">int<\/span>(<span class=\"hljs-number\">0.85<\/span>*length):)\n<\/code><\/pre>\n<p>\u0627\u06cc\u0646 \u06cc\u06a9 \u0631\u0648\u0634 \u0633\u0627\u062f\u0647 \u0648 \u062f\u0631 \u0639\u06cc\u0646 \u062d\u0627\u0644 \u0645\u0648\u062b\u0631 \u0628\u0631\u0627\u06cc \u0627\u0646\u062c\u0627\u0645 \u062a\u0642\u0633\u06cc\u0645 \u0628\u0646\u062f\u06cc \u0642\u0637\u0627\u0631-\u0622\u0632\u0645\u0648\u0646- \u0627\u0639\u062a\u0628\u0627\u0631 \u0633\u0646\u062c\u06cc \u0627\u0633\u062a.  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0646\u06af\u0627\u0647\u06cc \u0628\u0647 \u0622\u0646 \u0628\u06cc\u0646\u062f\u0627\u0632\u06cc\u0645 <code>text_train<\/code>:<\/p>\n<pre><code class=\"hljs\">(' It was a dull morning, but the snow had ceased',\n '\\n\\n\"Pierre, you who know so much of what goes \u0631\u0648\u06cc here, can you really have\\nknown nothing of this business and have heard nothing about it?\"\\n\\n\"What? What a set! So it\\'s not enough to be a child in your old age,\\nyou must be a spiteful child too! Varvara Petrovna, did you hear what he\\nsaid?\"\\n\\nThere was a general outcry; but then suddenly an incident took place\\nwhich no one could have anticipated', ...\n<\/code><\/pre>\n<p>\u0632\u0645\u0627\u0646 \u0627\u0633\u062a\u0627\u0646\u062f\u0627\u0631\u062f\u0633\u0627\u0632\u06cc \u0648 \u0628\u0631\u062f\u0627\u0631\u06cc \u0627\u0633\u062a!<\/p>\n<h3 id=\"textvectorization\"><span class=\"ez-toc-section\" id=\"%d8%a8%d8%b1%d8%af%d8%a7%d8%b1_%d8%b3%d8%a7%d8%b2%db%8c_%d9%85%d8%aa%d9%86\"><\/span>\u0628\u0631\u062f\u0627\u0631 \u0633\u0627\u0632\u06cc \u0645\u062a\u0646<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0634\u0628\u06a9\u0647 \u0647\u0627 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 \u062f\u0631\u06a9 \u0646\u0645\u06cc \u06a9\u0646\u0646\u062f &#8211; \u0622\u0646\u0647\u0627 \u0627\u0639\u062f\u0627\u062f \u0631\u0627 \u062f\u0631\u06a9 \u0645\u06cc \u06a9\u0646\u0646\u062f.  \u0645\u0627 \u0645\u06cc \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0644\u0645\u0627\u062a \u0631\u0627 &#8220;\u062a\u0648\u06a9\u0646\u06cc\u0632\u0647&#8221; \u06a9\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\">...\nsequence = ('I', 'am', 'Wall-E')\nsequence = tokenize(sequence)\nprint(sequence) # (4, 26, 472)\n...\n<\/code><\/pre>\n<p>\u0647\u0645\u0686\u0646\u06cc\u0646\u060c \u0627\u0632 \u0622\u0646\u062c\u0627\u06cc\u06cc \u06a9\u0647 \u062c\u0645\u0644\u0627\u062a \u0627\u0632 \u0646\u0638\u0631 \u0637\u0648\u0644 \u0645\u062a\u0641\u0627\u0648\u062a \u0647\u0633\u062a\u0646\u062f &#8211; padding \u0645\u0639\u0645\u0648\u0644\u0627\u064b \u0628\u0647 \u0686\u067e \u06cc\u0627 \u0631\u0627\u0633\u062a \u0627\u0636\u0627\u0641\u0647 \u0645\u06cc\u200c\u0634\u0648\u062f \u062a\u0627 \u0627\u0632 \u0634\u06a9\u0644 \u06cc\u06a9\u0633\u0627\u0646\u06cc \u062f\u0631 \u062c\u0645\u0644\u0627\u062a\u06cc \u06a9\u0647 \u0648\u0627\u0631\u062f \u0645\u06cc\u200c\u0634\u0648\u0646\u062f \u0627\u0637\u0645\u06cc\u0646\u0627\u0646 \u062d\u0627\u0635\u0644 \u0634\u0648\u062f. \u0628\u06af\u0648\u06cc\u06cc\u062f \u0637\u0648\u0644\u0627\u0646\u06cc\u200c\u062a\u0631\u06cc\u0646 \u062c\u0645\u0644\u0647 \u0645\u0627 5 \u06a9\u0644\u0645\u0647 (\u062a\u0648\u06a9\u0646) \u0627\u0633\u062a.  \u062f\u0631 \u0622\u0646 \u0635\u0648\u0631\u062a\u060c \u062c\u0645\u0644\u0647 Wall-E \u0628\u0627 \u062f\u0648 \u0635\u0641\u0631 \u067e\u0631 \u0645\u06cc\u200c\u0634\u0648\u062f\u060c \u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646 \u0627\u0632 \u0647\u0645\u0627\u0646 \u0634\u06a9\u0644 \u0648\u0631\u0648\u062f\u06cc \u0627\u0637\u0645\u06cc\u0646\u0627\u0646 \u0645\u06cc\u200c\u062f\u0647\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\">sequence = pad_sequence(sequence)\nprint(sequence) # (4, 26, 472, 0, 0)\n<\/code><\/pre>\n<p>\u0628\u0647 \u0637\u0648\u0631 \u0633\u0646\u062a\u06cc\u060c \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 TensorFlow \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc \u0634\u062f <code>Tokenizer<\/code> \u0648 \u06a9\u0631\u0627\u0633 <code>pad_sequences()<\/code> \u0631\u0648\u0634 \u0647\u0627 &#8211; \u0628\u0627 \u0627\u06cc\u0646 \u062d\u0627\u0644\u060c \u06cc\u06a9 \u0644\u0627\u06cc\u0647 \u0628\u0633\u06cc\u0627\u0631 \u0645\u0641\u06cc\u062f\u062a\u0631\u060c <code>TextVectorization<\/code>\u060c \u0645\u06cc \u062a\u0648\u0627\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0631\u062f \u06a9\u0647 &#8220;\u062a\u0648\u06a9\u0646\u06cc\u0632\u0647 \u0645\u06cc \u06a9\u0646\u062f&#8221; <em>\u0648<\/em> \u0648\u0631\u0648\u062f\u06cc \u062e\u0648\u062f \u0631\u0627 \u0627\u0636\u0627\u0641\u0647 \u0645\u06cc \u06a9\u0646\u062f \u0648 \u0628\u0647 \u0634\u0645\u0627 \u0627\u06cc\u0646 \u0627\u0645\u06a9\u0627\u0646 \u0631\u0627 \u0645\u06cc \u062f\u0647\u062f \u06a9\u0647 \u0648\u0627\u0698\u06af\u0627\u0646 \u0648 \u0627\u0646\u062f\u0627\u0632\u0647 \u0622\u0646 \u0631\u0627 \u0628\u062f\u0648\u0646 \u062f\u0627\u0646\u0633\u062a\u0646 \u0648\u0627\u0698\u06af\u0627\u0646 \u0627\u0632 \u0642\u0628\u0644 \u0627\u0633\u062a\u062e\u0631\u0627\u062c \u06a9\u0646\u06cc\u062f!<\/p>\n<p>\u0628\u06cc\u0627\u06cc\u06cc\u062f \u0648\u0641\u0642 \u062f\u0647\u06cc\u0645 \u0648 \u0645\u0646\u0627\u0633\u0628 \u0628\u0627\u0634\u06cc\u0645 <code>TextVectorization<\/code> \u0644\u0627\u06cc\u0647:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> tensorflow.keras.layers <span class=\"hljs-keyword\">import<\/span> TextVectorization\n\n<span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">custom_standardization<\/span>(<span class=\"hljs-params\">input_string<\/span>):<\/span>\n    sentence = tf.strings.lower(input_string)\n    sentence = tf.strings.regex_replace(sentence, <span class=\"hljs-string\">\"\\n\"<\/span>, <span class=\"hljs-string\">\" \"<\/span>)\n    <span class=\"hljs-keyword\">return<\/span> sentence\n\nmaxlen = <span class=\"hljs-number\">50<\/span>\n\n\n\nvectorize_layer = TextVectorization(\n    standardize = custom_standardization,\n    output_mode=<span class=\"hljs-string\">\"int\"<\/span>,\n    output_sequence_length=maxlen + <span class=\"hljs-number\">1<\/span>,\n)\n\nvectorize_layer.adapt(text_list)\nvocab = vectorize_layer.get_vocabulary()\n<\/code><\/pre>\n<p>\u0631\u0627 <code>custom_standardization()<\/code> \u0631\u0648\u0634 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u062e\u06cc\u0644\u06cc \u0637\u0648\u0644\u0627\u0646\u06cc \u062a\u0631 \u0627\u0632 \u0627\u06cc\u0646 \u0628\u0627\u0634\u062f.  \u0645\u0627 \u0628\u0647 \u0633\u0627\u062f\u06af\u06cc \u062a\u0645\u0627\u0645 \u0648\u0631\u0648\u062f\u06cc \u0647\u0627 \u0631\u0627 \u067e\u0627\u06cc\u06cc\u0646 \u0622\u0648\u0631\u062f\u0647 \u0627\u06cc\u0645 \u0648 \u062c\u0627\u06cc\u06af\u0632\u06cc\u0646 \u0634\u062f\u0647 \u0627\u06cc\u0645 <code>\\n<\/code> \u0628\u0627 <code>\" \"<\/code>.  \u0627\u06cc\u0646\u062c\u0627\u0633\u062a \u06a9\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u06cc\u0634\u062a\u0631 \u067e\u06cc\u0634\u200c\u067e\u0631\u062f\u0627\u0632\u0634 \u062e\u0648\u062f \u0631\u0627 \u0628\u0631\u0627\u06cc \u0645\u062a\u0646 \u0642\u0631\u0627\u0631 \u062f\u0647\u06cc\u062f &#8211; \u0648 \u0622\u0646 \u0631\u0627 \u0627\u0632 \u0637\u0631\u06cc\u0642 \u06af\u0632\u06cc\u0646\u0647 \u0627\u062e\u062a\u06cc\u0627\u0631\u06cc \u0628\u0647 \u0644\u0627\u06cc\u0647 \u0628\u0631\u062f\u0627\u0631\u06cc \u0627\u0631\u0627\u0626\u0647 \u06a9\u0646\u06cc\u062f. <code>standardize<\/code> \u0628\u062d\u062b \u0648 \u062c\u062f\u0644.  \u06cc\u06a9 \u0628\u0627\u0631 \u0634\u0645\u0627 <code>adapt()<\/code> \u0644\u0627\u06cc\u0647 \u0628\u0647 \u0645\u062a\u0646 (\u0622\u0631\u0627\u06cc\u0647 NumPy \u06cc\u0627 \u0644\u06cc\u0633\u062a \u0645\u062a\u0648\u0646) &#8211; \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0648\u0627\u0698\u06af\u0627\u0646 \u0648 \u0647\u0645\u0686\u0646\u06cc\u0646 \u0627\u0646\u062f\u0627\u0632\u0647 \u0622\u0646 \u0631\u0627 \u0627\u0632 \u0622\u0646\u062c\u0627 \u062f\u0631\u06cc\u0627\u0641\u062a \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">vocab_size = <span class=\"hljs-built_in\">len<\/span>(vocab)\nvocab_size \n<\/code><\/pre>\n<p>\u062f\u0631 \u0646\u0647\u0627\u06cc\u062a\u060c \u0628\u0631\u0627\u06cc \u00ab\u062a\u0648\u06a9\u0646 \u0632\u062f\u0627\u06cc\u06cc\u00bb \u06a9\u0644\u0645\u0627\u062a\u060c \u06cc\u06a9 \u0639\u0644\u0627\u0645\u062a \u0627\u06cc\u062c\u0627\u062f \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>index_lookup<\/code> \u0641\u0631\u0647\u0646\u06af \u0644\u063a\u062a:<\/p>\n<pre><code class=\"hljs\">index_lookup = <span class=\"hljs-built_in\">dict<\/span>(<span class=\"hljs-built_in\">zip<\/span>(<span class=\"hljs-built_in\">range<\/span>(<span class=\"hljs-built_in\">len<\/span>(vocab)), vocab))    \nindex_lookup(<span class=\"hljs-number\">5<\/span>) \n<\/code><\/pre>\n<p>\u0647\u0645\u0647 \u062a\u0648\u06a9\u0646 \u0647\u0627 \u0631\u0627 \u0646\u0642\u0634\u0647 \u0628\u0631\u062f\u0627\u0631\u06cc \u0645\u06cc \u06a9\u0646\u062f (<code>(1, 2, 3, 4, ...)<\/code>) \u0628\u0647 \u06a9\u0644\u0645\u0627\u062a \u0645\u0648\u062c\u0648\u062f \u062f\u0631 \u0648\u0627\u0698\u06af\u0627\u0646 (<code>('a', 'the', 'i', ...)<\/code>).  \u0628\u0627 \u0639\u0628\u0648\u0631 \u0627\u0632 \u06cc\u06a9 \u06a9\u0644\u06cc\u062f (\u0634\u0627\u062e\u0635 \u0646\u0634\u0627\u0646\u0647) \u0628\u0647 \u0631\u0627\u062d\u062a\u06cc \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u06a9\u0644\u0645\u0647 \u0631\u0627 \u067e\u0633 \u0628\u06af\u06cc\u0631\u06cc\u0645.  \u0627\u06a9\u0646\u0648\u0646 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f <code>vectorize_layer()<\/code> \u0631\u0648\u06cc  \u0647\u0631 \u0648\u0631\u0648\u062f\u06cc \u0648 \u0645\u0634\u0627\u0647\u062f\u0647 \u062c\u0645\u0644\u0627\u062a \u0628\u0631\u062f\u0627\u0631\u06cc:<\/p>\n<pre><code class=\"hljs\">vectorize_layer((<span class=\"hljs-string\">'hello world!'<\/span>))\n<\/code><\/pre>\n<p>\u06a9\u0647 \u0645\u0646\u062c\u0631 \u0628\u0647:<\/p>\n<pre><code class=\"hljs\">&lt;tf.Tensor: shape=(1, 51), dtype=int64, numpy=\narray(((   1, 7509,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n           0,    0,    0,    0,    0,    0,    0)), dtype=int64)&gt;\n<\/code><\/pre>\n<p>\u0633\u0644\u0627\u0645 \u062f\u0627\u0631\u0627\u06cc \u0634\u0627\u062e\u0635 \u0627\u0632 <code>1<\/code> \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u062c\u0647\u0627\u0646 \u062f\u0627\u0631\u0627\u06cc \u0634\u0627\u062e\u0635 \u0627\u0633\u062a <code>7509<\/code>!  \u0628\u0642\u06cc\u0647 \u0628\u0627\u0644\u0634\u062a\u06a9 \u0628\u0647 <code>maxlen<\/code> \u0645\u0627 \u0645\u062d\u0627\u0633\u0628\u0647 \u06a9\u0631\u062f\u0647 \u0627\u06cc\u0645<\/p>\n<p>\u0645\u0627 \u0627\u0628\u0632\u0627\u0631\u06cc \u0628\u0631\u0627\u06cc \u0628\u0631\u062f\u0627\u0631 \u06a9\u0631\u062f\u0646 \u0645\u062a\u0646 \u062f\u0627\u0631\u06cc\u0645 &#8211; \u0627\u06a9\u0646\u0648\u0646\u060c \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0645\u062c\u0645\u0648\u0639\u0647 \u0647\u0627\u06cc \u062f\u0627\u062f\u0647 \u0631\u0627 \u0627\u0632 \u0622\u0646 \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u0645 <code>text_train<\/code>\u060c <code>text_test<\/code> \u0648 <code>text_valid<\/code>\u060c \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0644\u0627\u06cc\u0647 \u0628\u0631\u062f\u0627\u0631\u06cc \u0645\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u06cc\u06a9 \u0631\u0633\u0627\u0646\u0647 \u062a\u0628\u062f\u06cc\u0644 \u0628\u06cc\u0646 \u06a9\u0644\u0645\u0627\u062a \u0648 \u0628\u0631\u062f\u0627\u0631\u0647\u0627\u06cc\u06cc \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u0646\u062f \u0628\u0647 GPT-Fyodor \u0648\u0627\u0631\u062f \u0634\u0648\u0646\u062f.<\/p>\n<h3 id=\"datasetcreation\"><span class=\"ez-toc-section\" id=\"%d8%a7%db%8c%d8%ac%d8%a7%d8%af_%d9%85%d8%ac%d9%85%d9%88%d8%b9%d9%87_%d8%af%d8%a7%d8%af%d9%87\"><\/span>\u0627\u06cc\u062c\u0627\u062f \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0645\u0627 \u062f\u0631 \u062d\u0627\u0644 \u0627\u06cc\u062c\u0627\u062f \u06cc\u06a9 <code>tf.data.Dataset<\/code> \u0628\u0631\u0627\u06cc \u0647\u0631 \u06cc\u06a9 \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u0647\u0627\u06cc \u0645\u0627 \u060c \u0627\u0633\u062a\u0641\u0627\u062f\u0647 <code>from_tensor_slices()<\/code> \u0648 \u0627\u0631\u0627\u0626\u0647 \u0641\u0647\u0631\u0633\u062a\u06cc \u0627\u0632\u060c \u062e\u0648\u0628\u060c \u0628\u0631\u0634 \u0647\u0627\u06cc \u062a\u0627\u0646\u0633\u0648\u0631 (\u062c\u0645\u0644\u0627\u062a):<\/p>\n<pre><code class=\"hljs\">batch_size = <span class=\"hljs-number\">64<\/span>\n\ntrain_dataset = tf.data.Dataset.from_tensor_slices(text_train)\ntrain_dataset = train_dataset.shuffle(buffer_size=<span class=\"hljs-number\">256<\/span>)\ntrain_dataset = train_dataset.batch(batch_size)\n\ntest_dataset = tf.data.Dataset.from_tensor_slices(text_test)\ntest_dataset = test_dataset.shuffle(buffer_size=<span class=\"hljs-number\">256<\/span>)\ntest_dataset = test_dataset.batch(batch_size)\n\nvalid_dataset = tf.data.Dataset.from_tensor_slices(text_valid)\nvalid_dataset = valid_dataset.shuffle(buffer_size=<span class=\"hljs-number\">256<\/span>)\nvalid_dataset = valid_dataset.batch(batch_size)\n<\/code><\/pre>\n<p>\u067e\u0633 \u0627\u0632 \u0627\u06cc\u062c\u0627\u062f \u0648 \u0645\u062e\u0644\u0648\u0637 \u06a9\u0631\u062f\u0646 (\u062f\u0648\u0628\u0627\u0631\u0647\u060c \u0628\u0631\u0627\u06cc \u0627\u0646\u062f\u0627\u0632\u0647 \u06af\u06cc\u0631\u06cc \u062e\u0648\u0628) &#8211; \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u06cc\u06a9 \u062a\u0627\u0628\u0639 \u067e\u06cc\u0634 \u067e\u0631\u062f\u0627\u0632\u0634 (\u0628\u0631\u062f\u0627\u0631\u0633\u0627\u0632\u06cc \u0648 \u062a\u0642\u0633\u06cc\u0645 \u062a\u0631\u062a\u06cc\u0628) \u0631\u0627 \u0627\u0639\u0645\u0627\u0644 \u06a9\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">preprocess_text<\/span>(<span class=\"hljs-params\">text<\/span>):<\/span>\n    text = tf.expand_dims(text, -<span class=\"hljs-number\">1<\/span>)\n    tokenized_sentences = vectorize_layer(text)\n    x = tokenized_sentences(:, :-<span class=\"hljs-number\">1<\/span>)\n    y = tokenized_sentences(:, <span class=\"hljs-number\">1<\/span>:)\n    <span class=\"hljs-keyword\">return<\/span> x, y\n\n\ntrain_dataset = train_dataset.<span class=\"hljs-built_in\">map<\/span>(preprocess_text)\ntrain_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)\n\ntest_dataset = test_dataset.<span class=\"hljs-built_in\">map<\/span>(preprocess_text)\ntest_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)\n\nvalid_dataset = valid_dataset.<span class=\"hljs-built_in\">map<\/span>(preprocess_text)\nvalid_dataset = valid_dataset.prefetch(tf.data.AUTOTUNE)\n<\/code><\/pre>\n<p>\u0631\u0627 <code>preprocess_text()<\/code> \u062a\u0627\u0628\u0639 \u0628\u0647 \u0633\u0627\u062f\u06af\u06cc \u0628\u0627 \u0622\u062e\u0631\u06cc\u0646 \u0628\u0639\u062f \u06af\u0633\u062a\u0631\u0634 \u0645\u06cc \u06cc\u0627\u0628\u062f\u060c \u0645\u062a\u0646 \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0645\u0627 &#8220;\u0628\u0631\u062f\u0627\u0631&#8221; \u0645\u06cc \u06a9\u0646\u062f <code>vectorize_layer<\/code> \u0648 \u0648\u0631\u0648\u062f\u06cc \u0647\u0627 \u0648 \u0627\u0647\u062f\u0627\u0641 \u0631\u0627 \u0627\u06cc\u062c\u0627\u062f \u0645\u06cc \u06a9\u0646\u062f \u06a9\u0647 \u0628\u0627 \u06cc\u06a9 \u062a\u0648\u06a9\u0646 \u062c\u0628\u0631\u0627\u0646 \u0645\u06cc \u0634\u0648\u062f.  \u0645\u062f\u0644 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u062e\u0648\u0627\u0647\u062f \u06a9\u0631\u062f <code>(0..n)<\/code> \u0627\u0633\u062a\u0646\u0628\u0627\u0637 \u06a9\u0631\u062f\u0646 <code>n+1<\/code>\u060c \u0628\u0631\u0627\u06cc \u0647\u0631 \u06a9\u0644\u0645\u0647 \u06cc\u06a9 \u067e\u06cc\u0634 \u0628\u06cc\u0646\u06cc \u0628\u0647 \u062f\u0633\u062a \u0645\u06cc \u062f\u0647\u062f \u0648 \u062a\u0645\u0627\u0645 \u06a9\u0644\u0645\u0627\u062a \u0642\u0628\u0644 \u0627\u0632 \u0622\u0646 \u0631\u0627 \u0645\u062d\u0627\u0633\u0628\u0647 \u0645\u06cc \u06a9\u0646\u062f.  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0646\u06af\u0627\u0647\u06cc \u0628\u0647 \u06cc\u06a9 \u0648\u0631\u0648\u062f\u06cc \u0648\u0627\u062d\u062f \u062f\u0631 \u0647\u0631 \u06cc\u06a9 \u0627\u0632 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u0647\u0627 \u0628\u06cc\u0646\u062f\u0627\u0632\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">for<\/span> entry <span class=\"hljs-keyword\">in<\/span> train_dataset.take(<span class=\"hljs-number\">1<\/span>):\n    <span class=\"hljs-built_in\">print<\/span>(entry)\n<\/code><\/pre>\n<p>\u0628\u0627 \u0628\u0631\u0631\u0633\u06cc \u0648\u0631\u0648\u062f\u06cc\u200c\u0647\u0627 \u0648 \u0627\u0647\u062f\u0627\u0641 \u0628\u0631\u06af\u0634\u062a\u06cc\u060c \u062f\u0631 \u062f\u0633\u062a\u0647\u200c\u0647\u0627\u06cc 64 \u062a\u0627\u06cc\u06cc (\u0628\u0627 \u0637\u0648\u0644 \u0647\u0631 \u06a9\u062f\u0627\u0645 30 \u0639\u062f\u062f)\u060c \u0628\u0647 \u0648\u0636\u0648\u062d \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u0645 \u0628\u0628\u06cc\u0646\u06cc\u0645 \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 \u0622\u0646\u0647\u0627 \u0628\u0627 \u06cc\u06a9 \u062c\u0628\u0631\u0627\u0646 \u0645\u06cc\u200c\u0634\u0648\u0646\u062f:<\/p>\n<pre><code class=\"hljs\">(&lt;tf.Tensor: shape=(64, 50), dtype=int64, numpy=\narray(((17018,   851,     2, ...,     0,     0,     0),\n       (  330,    74,     4, ...,     0,     0,     0),\n       (   68,   752, 30273, ...,     0,     0,     0),\n       ...,\n       (    7,    73,  2004, ...,     0,     0,     0),\n       (   44,    42,    67, ...,     0,     0,     0),\n       (  195,   252,   102, ...,     0,     0,     0)), dtype=int64)&gt;, &lt;tf.Tensor: shape=(64, 50), dtype=int64, numpy=\narray(((  851,     2,  8289, ...,     0,     0,     0),\n       (   74,     4,    34, ...,     0,     0,     0),\n       (  752, 30273,  7514, ...,     0,     0,     0),\n       ...,\n       (   73,  2004,    31, ...,     0,     0,     0),\n       (   42,    67,    76, ...,     0,     0,     0),\n       (  252,   102,  8596, ...,     0,     0,     0)), dtype=int64)&gt;)\n<\/code><\/pre>\n<p>\u0633\u0631\u0627\u0646\u062c\u0627\u0645 &#8211; \u0648\u0642\u062a \u0622\u0646 \u0627\u0633\u062a \u06a9\u0647 \u0645\u062f\u0644 \u0631\u0627 \u0628\u0633\u0627\u0632\u06cc\u0645!<\/p>\n<h3 id=\"modeldefinition\"><span class=\"ez-toc-section\" id=\"%d8%aa%d8%b9%d8%b1%db%8c%d9%81_%d9%85%d8%af%d9%84\"><\/span>\u062a\u0639\u0631\u06cc\u0641 \u0645\u062f\u0644<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0645\u0627 \u062f\u0631 \u0627\u06cc\u0646\u062c\u0627 \u0627\u0632 \u0644\u0627\u06cc\u0647 \u0647\u0627\u06cc Kerasnlp \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f.  \u0628\u0639\u062f \u0627\u0632 \u06cc\u06a9 <code>Input<\/code>\u060c \u0645\u0627 \u0648\u0631\u0648\u062f\u06cc \u0631\u0627 \u0627\u0632 \u0637\u0631\u06cc\u0642 a \u0631\u0645\u0632\u06af\u0630\u0627\u0631\u06cc \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>TokenAndPositionEmbedding<\/code> \u0644\u0627\u06cc\u0647\u060c \u0639\u0628\u0648\u0631 \u062f\u0631 \u0645\u0627 <code>vocab_size<\/code>\u060c <code>maxlen<\/code> \u0648 <code>embed_dim<\/code>.  \u0647\u0645\u06cc\u0646\u0637\u0648\u0631 <code>embed_dim<\/code> \u06a9\u0647 \u0627\u06cc\u0646 \u0644\u0627\u06cc\u0647 \u0648\u0627\u0631\u062f \u0634\u062f\u0647 \u0648 \u0648\u0627\u0631\u062f \u0645\u06cc \u0634\u0648\u062f <code>TransformerDecoder<\/code> \u062e\u0648\u0627\u0647\u062f \u0628\u0648\u062f <em>\u062f\u0631 \u0631\u0633\u06cc\u0648\u0631 \u0646\u06af\u0647\u062f\u0627\u0631\u06cc \u0645\u06cc \u0634\u0648\u062f<\/em>.  \u062f\u0631 \u0632\u0645\u0627\u0646 \u0646\u0648\u0634\u062a\u0646\u060c \u0631\u0633\u06cc\u0648\u0631 \u0628\u0647 \u0637\u0648\u0631 \u062e\u0648\u062f\u06a9\u0627\u0631 \u0627\u0628\u0639\u0627\u062f \u0648\u0631\u0648\u062f\u06cc \u0631\u0627 \u062d\u0641\u0638 \u0645\u06cc \u06a9\u0646\u062f\u060c \u0648 \u0628\u0647 \u0634\u0645\u0627 \u0627\u062c\u0627\u0632\u0647 \u0646\u0645\u06cc \u062f\u0647\u062f \u0622\u0646 \u0631\u0627 \u062f\u0631 \u062e\u0631\u0648\u062c\u06cc \u062f\u06cc\u06af\u0631\u06cc \u0642\u0631\u0627\u0631 \u062f\u0647\u06cc\u062f\u060c \u0627\u0645\u0627 \u0628\u0647 \u0634\u0645\u0627 \u0627\u062c\u0627\u0632\u0647 \u0645\u06cc \u062f\u0647\u062f \u0627\u0628\u0639\u0627\u062f \u067e\u0646\u0647\u0627\u0646 \u0631\u0627 \u0627\u0632 \u0637\u0631\u06cc\u0642 <code>intermediate_dim<\/code> \u0628\u062d\u062b \u0648 \u062c\u062f\u0644.<\/p>\n<p>\u0628\u0631\u0627\u06cc \u0646\u0645\u0627\u06cc\u0634 \u067e\u0646\u0647\u0627\u0646\u060c \u0627\u0628\u0639\u0627\u062f \u062c\u0627\u0633\u0627\u0632\u06cc \u0631\u0627 \u062f\u0631 \u062f\u0648 \u0636\u0631\u0628 \u0645\u06cc\u200c\u06a9\u0646\u06cc\u0645\u060c \u0627\u0645\u0627 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u0622\u0646 \u0631\u0627 \u062b\u0627\u0628\u062a \u0646\u06af\u0647 \u062f\u0627\u0631\u06cc\u062f \u06cc\u0627 \u0627\u0632 \u0639\u062f\u062f\u06cc \u062c\u062f\u0627 \u0634\u062f\u0647 \u0627\u0632 \u06a9\u0645\u0631\u0646\u06af\u200c\u0647\u0627\u06cc \u062a\u0639\u0628\u06cc\u0647\u200c\u0634\u062f\u0647 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">embed_dim = <span class=\"hljs-number\">128<\/span>\nnum_heads = <span class=\"hljs-number\">4<\/span>\n\n<span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">create_model<\/span>():<\/span>\n    inputs = keras.layers.Input(shape=(maxlen,), dtype=tf.int32)\n    embedding_layer = keras_nlp.layers.TokenAndPositionEmbedding(vocab_size, maxlen, embed_dim)(inputs)\n    decoder = keras_nlp.layers.TransformerDecoder(intermediate_dim=embed_dim, \n                                                            num_heads=num_heads, \n                                                            dropout=<span class=\"hljs-number\">0.5<\/span>)(embedding_layer)\n    \n    outputs = keras.layers.Dense(vocab_size, activation=<span class=\"hljs-string\">'softmax'<\/span>)(decoder)\n    \n    model = keras.Model(inputs=inputs, outputs=outputs)\n    \n    model.<span class=\"hljs-built_in\">compile<\/span>(\n        optimizer=<span class=\"hljs-string\">\"adam\"<\/span>, \n        loss=<span class=\"hljs-string\">'sparse_categorical_crossentropy'<\/span>,\n        metrics=(keras_nlp.metrics.Perplexity(), <span class=\"hljs-string\">'accuracy'<\/span>)\n    )\n    <span class=\"hljs-keyword\">return<\/span> model\n\nmodel = create_model()\nmodel.summary()\n<\/code><\/pre>\n<p>\u062f\u0631 \u0628\u0627\u0644\u0627\u06cc \u0631\u0645\u0632\u06af\u0630\u0627\u0631 \u060c \u0645\u0627 <code>Dense<\/code> \u0644\u0627\u06cc\u0647 \u0628\u0631\u0627\u06cc \u0627\u0646\u062a\u062e\u0627\u0628 \u06a9\u0644\u0645\u0647 \u0628\u0639\u062f\u06cc \u062f\u0631 \u062f\u0646\u0628\u0627\u0644\u0647\u060c \u0628\u0627 a <code>softmax<\/code> \u0641\u0639\u0627\u0644 \u0633\u0627\u0632\u06cc (\u06a9\u0647 \u062a\u0648\u0632\u06cc\u0639 \u0627\u062d\u062a\u0645\u0627\u0644 \u0631\u0627 \u0628\u0631\u0627\u06cc \u0647\u0631 \u062a\u0648\u06a9\u0646 \u0628\u0639\u062f\u06cc \u062a\u0648\u0644\u06cc\u062f \u0645\u06cc \u06a9\u0646\u062f).  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0646\u06af\u0627\u0647\u06cc \u0628\u0647 \u062e\u0644\u0627\u0635\u0647 \u0645\u062f\u0644 \u0628\u06cc\u0646\u062f\u0627\u0632\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\">Model: \"model_5\"\n_________________________________________________________________\n Layer (type)                Output Shape              Param #   \n=================================================================\n input_6 (InputLayer)        ((None, 30))              0         \n                                                                 \n token_and_position_embeddin  (None, 30, 128)          6365824   \n g_5 (TokenAndPositionEmbedd                                     \n ing)                                                            \n                                                                 \n transformer_decoder_5 (Tran  (None, 30, 128)          132480    \n sformerDecoder)                                                 \n                                                                 \n dense_5 (Dense)             (None, 30, 49703)         6411687   \n                                                                 \n=================================================================\nTotal params: 13,234,315\nTrainable params: 13,234,315\nNon-trainable params: 0\n_________________________________________________________________\n<\/code><\/pre>\n<p>GPT-2 \u0631\u0645\u0632\u06af\u0634\u0627\u0647\u0627\u06cc \u0632\u06cc\u0627\u062f\u06cc \u0631\u0627 \u0631\u0648\u06cc \u0647\u0645 \u0642\u0631\u0627\u0631 \u0645\u06cc \u062f\u0647\u062f &#8211; GPT-2 Small \u062f\u0627\u0631\u0627\u06cc 12 \u0631\u0645\u0632\u06af\u0634\u0627\u06cc \u067e\u0634\u062a\u0647 \u0627\u06cc (117M \u067e\u0627\u0631\u0627\u0645\u062a\u0631) \u0627\u0633\u062a\u060c \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 GPT-2 Extra Large \u062f\u0627\u0631\u0627\u06cc 48 \u0631\u0645\u0632\u06af\u0634\u0627\u06cc \u067e\u0634\u062a\u0647 \u0627\u06cc (1.5B \u067e\u0627\u0631\u0627\u0645\u062a\u0631) \u0627\u0633\u062a.  \u0645\u062f\u0644 \u062a\u06a9 \u0631\u0645\u0632\u06af\u0634\u0627\u06cc \u0645\u0627 \u0628\u0627 \u067e\u0627\u0631\u0627\u0645\u062a\u0631\u0647\u0627\u06cc \u0633\u0627\u062f\u0647 13M \u0628\u0627\u06cc\u062f \u0628\u0631\u0627\u06cc \u0627\u0647\u062f\u0627\u0641 \u0622\u0645\u0648\u0632\u0634\u06cc \u0628\u0647 \u0627\u0646\u062f\u0627\u0632\u0647 \u06a9\u0627\u0641\u06cc \u062e\u0648\u0628 \u06a9\u0627\u0631 \u06a9\u0646\u062f.  \u0628\u0627 LLM \u0647\u0627 &#8211; \u062b\u0627\u0628\u062a \u0634\u062f\u0647 \u0627\u0633\u062a \u06a9\u0647 \u0627\u0641\u0632\u0627\u06cc\u0634 \u0645\u0642\u06cc\u0627\u0633 \u06cc\u06a9 \u0627\u0633\u062a\u0631\u0627\u062a\u0698\u06cc \u0628\u0633\u06cc\u0627\u0631 \u062e\u0648\u0628 \u0627\u0633\u062a\u060c \u0648 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631\u0647\u0627 \u0627\u062c\u0627\u0632\u0647 \u0645\u0642\u06cc\u0627\u0633 \u0628\u0646\u062f\u06cc \u062e\u0648\u0628 \u0631\u0627 \u0645\u06cc \u062f\u0647\u0646\u062f \u0648 \u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644 \u0647\u0627\u06cc \u0628\u0633\u06cc\u0627\u0631 \u0628\u0632\u0631\u06af \u0631\u0627 \u0627\u0645\u06a9\u0627\u0646 \u067e\u0630\u06cc\u0631 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>GPT-3 \u062f\u0627\u0631\u0627\u06cc \u06cc\u06a9 <em>&#8220;\u0636\u0639\u06cc\u0641&#8221;<\/em> \u067e\u0627\u0631\u0627\u0645\u062a\u0631\u0647\u0627\u06cc 175B.  \u062a\u06cc\u0645 Google Brain \u06cc\u06a9 \u0645\u062f\u0644 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 1.6T \u0631\u0627 \u0628\u0631\u0627\u06cc \u0627\u0646\u062c\u0627\u0645 \u062a\u062d\u0642\u06cc\u0642\u0627\u062a \u067e\u0631\u0627\u06a9\u0646\u062f\u0647 \u0648 \u062f\u0631 \u0639\u06cc\u0646 \u062d\u0627\u0644 \u0645\u062d\u0627\u0633\u0628\u0627\u062a \u0622\u0645\u0648\u0632\u0634 \u062f\u0627\u062f. \u0631\u0648\u06cc \u0647\u0645\u0627\u0646 \u0633\u0637\u062d \u0645\u062f\u0644 \u0647\u0627\u06cc \u06a9\u0648\u0686\u06a9\u062a\u0631.<\/p>\n<p>\u062f\u0631 \u0648\u0627\u0642\u0639\u060c \u0627\u06af\u0631 \u062a\u0639\u062f\u0627\u062f \u0631\u0645\u0632\u06af\u0634\u0627\u0647\u0627 \u0631\u0627 \u0627\u0632 1 \u0628\u0647 3 \u0627\u0641\u0632\u0627\u06cc\u0634 \u062f\u0647\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">create_model<\/span>():<\/span>\n    inputs = keras.layers.Input(shape=(maxlen,), dtype=tf.int32)\n    x = keras_nlp.layers.TokenAndPositionEmbedding(vocab_size, maxlen, embed_dim)(inputs)\n    <span class=\"hljs-keyword\">for<\/span> i <span class=\"hljs-keyword\">in<\/span> <span class=\"hljs-built_in\">range<\/span>(<span class=\"hljs-number\">4<\/span>):\n        x = keras_nlp.layers.TransformerDecoder(intermediate_dim=embed_dim*<span class=\"hljs-number\">2<\/span>, num_heads=num_heads,                                                             dropout=<span class=\"hljs-number\">0.5<\/span>)(x)\n    do = keras.layers.Dropout(<span class=\"hljs-number\">0.4<\/span>)(x)\n    outputs = keras.layers.Dense(vocab_size, activation=<span class=\"hljs-string\">'softmax'<\/span>)(do)\n    \n    model = keras.Model(inputs=inputs, outputs=outputs)\n<\/code><\/pre>\n<p>\u062a\u0639\u062f\u0627\u062f \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0645\u0627 400K \u0627\u0641\u0632\u0627\u06cc\u0634 \u0645\u06cc \u06cc\u0627\u0628\u062f:<\/p>\n<pre><code class=\"hljs\">Total params: 13,631,755\nTrainable params: 13,631,755\nNon-trainable params: 0\n<\/code><\/pre>\n<blockquote>\n<p>\u0628\u06cc\u0634\u062a\u0631 \u067e\u0627\u0631\u0627\u0645\u062a\u0631\u0647\u0627\u06cc \u0634\u0628\u06a9\u0647 \u0645\u0627 \u0627\u0632 <code>TokenAndPositionEmbedding<\/code> \u0648 <code>Dense<\/code> \u0644\u0627\u06cc\u0647 \u0647\u0627\u06cc!<\/p>\n<\/blockquote>\n<p>\u0627\u0639\u0645\u0627\u0642 \u0645\u062e\u062a\u0644\u0641 \u0631\u0645\u0632\u06af\u0634\u0627 \u0631\u0627 \u0627\u0645\u062a\u062d\u0627\u0646 \u06a9\u0646\u06cc\u062f &#8211; \u0627\u0632 1 \u062a\u0627 \u062a\u0645\u0627\u0645 \u0631\u0627\u0647\u06cc \u06a9\u0647 \u062f\u0633\u062a\u06af\u0627\u0647 \u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u06a9\u0627\u0631 \u06a9\u0646\u062f \u0648 \u0646\u062a\u0627\u06cc\u062c \u0631\u0627 \u06cc\u0627\u062f\u062f\u0627\u0634\u062a \u06a9\u0646\u06cc\u062f.  \u062f\u0631 \u0647\u0631 \u0635\u0648\u0631\u062a &#8211; \u0645\u0627 \u062a\u0642\u0631\u06cc\u0628\u0627\u064b \u0622\u0645\u0627\u062f\u0647 \u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644 \u0647\u0633\u062a\u06cc\u0645!  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u06cc\u06a9 \u067e\u0627\u0633\u062e \u062a\u0645\u0627\u0633 \u0633\u0641\u0627\u0631\u0634\u06cc \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u0645 \u06a9\u0647 \u0646\u0645\u0648\u0646\u0647 \u0627\u06cc \u0627\u0632 \u0645\u062a\u0646 \u0631\u0627 \u062a\u0648\u0644\u06cc\u062f \u0645\u06cc \u06a9\u0646\u062f \u0631\u0648\u06cc \u062f\u0631 \u0647\u0631 \u062f\u0648\u0631\u0647\u060c \u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646 \u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0628\u0628\u06cc\u0646\u06cc\u0645 \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 \u0645\u062f\u0644 \u0627\u0632 \u0637\u0631\u06cc\u0642 \u0622\u0645\u0648\u0632\u0634 \u062c\u0645\u0644\u0627\u062a \u0631\u0627 \u0645\u06cc \u0622\u0645\u0648\u0632\u062f.<\/p>\n<h3 id=\"customcallback\"><span class=\"ez-toc-section\" id=\"%d9%be%d8%a7%d8%b3%d8%ae_%d8%a8%d9%87_%d8%aa%d9%85%d8%a7%d8%b3_%d8%b3%d9%81%d8%a7%d8%b1%d8%b4%db%8c\"><\/span>\u067e\u0627\u0633\u062e \u0628\u0647 \u062a\u0645\u0627\u0633 \u0633\u0641\u0627\u0631\u0634\u06cc<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<pre><code class=\"hljs\"><span class=\"hljs-class\"><span class=\"hljs-keyword\">class<\/span> <span class=\"hljs-title\">TextSampler<\/span>(<span class=\"hljs-params\">keras.callbacks.Callback<\/span>):<\/span>\n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">__init__<\/span>(<span class=\"hljs-params\">self, start_prompt, max_tokens<\/span>):<\/span>\n        self.start_prompt = start_prompt\n        self.max_tokens = max_tokens\n        \n    \n    \n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">sample_token<\/span>(<span class=\"hljs-params\">self, logits<\/span>):<\/span>\n        logits, indices = tf.math.top_k(logits, k=<span class=\"hljs-number\">5<\/span>, <span class=\"hljs-built_in\">sorted<\/span>=<span class=\"hljs-literal\">True<\/span>)\n        indices = np.asarray(indices).astype(<span class=\"hljs-string\">\"int32\"<\/span>)\n        preds = keras.activations.softmax(tf.expand_dims(logits, <span class=\"hljs-number\">0<\/span>))(<span class=\"hljs-number\">0<\/span>)\n        preds = np.asarray(preds).astype(<span class=\"hljs-string\">\"float32\"<\/span>)\n        <span class=\"hljs-keyword\">return<\/span> np.random.choice(indices, p=preds)\n\n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">on_epoch_end<\/span>(<span class=\"hljs-params\">self, epoch, logs=<span class=\"hljs-literal\">None<\/span><\/span>):<\/span>\n        decoded_sample = self.start_prompt\n        \n        <span class=\"hljs-keyword\">for<\/span> i <span class=\"hljs-keyword\">in<\/span> <span class=\"hljs-built_in\">range<\/span>(self.max_tokens-<span class=\"hljs-number\">1<\/span>):\n            tokenized_prompt = vectorize_layer((decoded_sample))(:, :-<span class=\"hljs-number\">1<\/span>)\n            predictions = self.model.predict((tokenized_prompt), verbose=<span class=\"hljs-number\">0<\/span>)\n            \n            \n            \n            \n            sample_index = <span class=\"hljs-built_in\">len<\/span>(decoded_sample.strip().split())-<span class=\"hljs-number\">1<\/span>\n            \n            sampled_token = self.sample_token(predictions(<span class=\"hljs-number\">0<\/span>)(sample_index))\n            sampled_token = index_lookup(sampled_token)\n            decoded_sample += <span class=\"hljs-string\">\" \"<\/span> + sampled_token\n            \n        <span class=\"hljs-built_in\">print<\/span>(<span class=\"hljs-string\">f\"\\nSample text:\\n<span class=\"hljs-subst\">{decoded_sample}<\/span>...\\n\"<\/span>)\n\n\nrandom_sentence = <span class=\"hljs-string\">' '<\/span>.join(random.choice(text_valid).replace(<span class=\"hljs-string\">'\\n'<\/span>, <span class=\"hljs-string\">' '<\/span>).split(<span class=\"hljs-string\">' '<\/span>)(:<span class=\"hljs-number\">4<\/span>))\nsampler = TextSampler(random_sentence, <span class=\"hljs-number\">30<\/span>)\nreducelr = keras.callbacks.ReduceLROnPlateau(patience=<span class=\"hljs-number\">10<\/span>, monitor=<span class=\"hljs-string\">'val_loss'<\/span>)\n<\/code><\/pre>\n<h3 id=\"trainingthemodel\"><span class=\"ez-toc-section\" id=\"%d8%a2%d9%85%d9%88%d8%b2%d8%b4_%d9%85%d8%af%d9%84\"><\/span>\u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0628\u0627\u0644\u0627\u062e\u0631\u0647 \u0648\u0642\u062a \u062a\u0645\u0631\u06cc\u0646 \u0627\u0633\u062a!  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u062e\u0648\u062f\u0645\u0627\u0646 \u0631\u0627 \u06a9\u0646\u0627\u0631 \u0628\u06af\u0630\u0627\u0631\u06cc\u0645 <code>train_dataset<\/code> \u0648 <code>validation_dataset<\/code> \u0628\u0627 \u062a\u0645\u0627\u0633 \u0647\u0627\u06cc \u0628\u0631\u06af\u0634\u062a\u06cc:<\/p>\n<pre><code class=\"hljs\">model = create_model()\nhistory = model.fit(train_dataset, \n                    validation_data=valid_dataset,\n                    epochs=<span class=\"hljs-number\">10<\/span>, \n                    callbacks=(sampler, reducelr))\n<\/code><\/pre>\n<p>\u0646\u0645\u0648\u0646\u0647\u200c\u06af\u0631 \u062c\u0645\u0644\u0647\u200c\u06cc \u062a\u0627\u0633\u0641\u200c\u0622\u0648\u0631\u06cc \u0631\u0627 \u0627\u0646\u062a\u062e\u0627\u0628 \u06a9\u0631\u062f \u06a9\u0647 \u0628\u0627 \u0646\u0642\u0644 \u0642\u0648\u0644 \u067e\u0627\u06cc\u0627\u0646 \u0648 \u0634\u0631\u0648\u0639 \u0646\u0642\u0644\u200c\u0642\u0648\u0644 \u0634\u0631\u0648\u0639 \u0645\u06cc\u200c\u0634\u0648\u062f\u060c \u0627\u0645\u0627 \u0647\u0645\u0686\u0646\u0627\u0646 \u0646\u062a\u0627\u06cc\u062c \u062c\u0627\u0644\u0628\u06cc \u0631\u0627 \u062f\u0631 \u062d\u06cc\u0646 \u0622\u0645\u0648\u0632\u0634 \u0627\u06cc\u062c\u0627\u062f \u0645\u06cc\u200c\u06a9\u0646\u062f:<\/p>\n<pre><code class=\"hljs\"># Epoch training\nEpoch 1\/10\n658\/658 (==============================) - ETA: 0s - loss: 2.7480 - perplexity: 15.6119 - accuracy: 0.6711\n# on_epoch_end() sample generation\nSample text:\n\"  \"What do you had not been i had been the same man was not be the same eyes to been a whole man and he did a whole man to the own...\n# Validation\n658\/658 (==============================) - 158s 236ms\/step - loss: 2.7480 - perplexity: 15.6119 - accuracy: 0.6711 - val_loss: 2.2130 - val_perplexity: 9.1434 - val_accuracy: 0.6864 - lr: 0.0010\n...\nSample text:\n\"  \"What do you know it is it all this very much as i should not have a great impression  in the room to be  able of it in my heart...\n\n658\/658 (==============================) - 149s 227ms\/step - loss: 1.7753 - perplexity: 5.9019 - accuracy: 0.7183 - val_loss: 2.0039 - val_perplexity: 7.4178 - val_accuracy: 0.7057 - lr: 0.0010\n<\/code><\/pre>\n<p>\u0628\u0627 \u0627\u06cc\u0646 \u0634\u0631\u0648\u0639 \u0645\u06cc \u0634\u0648\u062f:<\/p>\n<blockquote>\n<p>&#8220;\u0686\u06cc \u0628\u0648\u062f\u06cc \u06a9\u0647 \u0646\u0628\u0648\u062f\u06cc \u0645\u0646 \u0647\u0645\u0648\u0646 \u0628\u0648\u062f\u0645&#8221;&#8230;<\/p>\n<\/blockquote>\n<p>\u06a9\u0647 \u0648\u0627\u0642\u0639\u0627\u064b \u0686\u0646\u062f\u0627\u0646 \u0645\u0646\u0637\u0642\u06cc \u0646\u06cc\u0633\u062a.  \u062f\u0631 \u067e\u0627\u06cc\u0627\u0646 \u062f\u0647 \u062f\u0648\u0631\u0647 \u06a9\u0648\u062a\u0627\u0647\u060c \u0686\u06cc\u0632\u06cc \u062f\u0631 \u0627\u0645\u062a\u062f\u0627\u062f \u062e\u0637\u0648\u0637 \u0632\u06cc\u0631 \u062a\u0648\u0644\u06cc\u062f \u0645\u06cc \u06a9\u0646\u062f:<\/p>\n<blockquote>\n<p>&#8220;\u0645\u0646\u0638\u0648\u0631\u062a \u0686\u06cc\u0633\u062a \u06a9\u0647 \u0627\u0644\u0628\u062a\u0647 \u0645\u0639\u0645\u0648\u0644\u06cc \u062a\u0631\u06cc\u0646 \u0645\u0631\u062f \u06cc\u06a9 \u0645\u0631\u062f \u0627\u0633\u062a&#8221;&#8230;<\/p>\n<\/blockquote>\n<p>\u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u062c\u0645\u0644\u0647 \u062f\u0648\u0645 \u0647\u0646\u0648\u0632 \u062e\u06cc\u0644\u06cc \u0645\u0639\u0646\u0627 \u0646\u062f\u0627\u0631\u062f &#8211; \u0628\u0633\u06cc\u0627\u0631 \u062d\u0633\u0627\u0633 \u062a\u0631 \u0627\u0632 \u062c\u0645\u0644\u0647 \u0627\u0648\u0644 \u0627\u0633\u062a.  \u0622\u0645\u0648\u0632\u0634 \u0637\u0648\u0644\u0627\u0646\u06cc \u062a\u0631 \u0631\u0648\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0628\u06cc\u0634\u062a\u0631 (\u0628\u0627 \u0645\u0631\u0627\u062d\u0644 \u067e\u06cc\u0634 \u067e\u0631\u062f\u0627\u0632\u0634 \u067e\u06cc\u0686\u06cc\u062f\u0647 \u062a\u0631) \u0646\u062a\u0627\u06cc\u062c \u0628\u0647\u062a\u0631\u06cc \u0628\u0647 \u0647\u0645\u0631\u0627\u0647 \u062e\u0648\u0627\u0647\u062f \u062f\u0627\u0634\u062a.  \u0645\u0627 \u0641\u0642\u0637 \u0622\u0646 \u0631\u0627 \u0622\u0645\u0648\u0632\u0634 \u062f\u0627\u062f\u0647 \u0627\u06cc\u0645 \u0631\u0648\u06cc 10 \u062f\u0648\u0631\u0647 \u0628\u0627 \u0627\u0646\u0635\u0631\u0627\u0641 \u0632\u06cc\u0627\u062f \u0628\u0631\u0627\u06cc \u0645\u0628\u0627\u0631\u0632\u0647 \u0628\u0627 \u0627\u0646\u062f\u0627\u0632\u0647 \u062f\u0627\u062f\u0647 \u06a9\u0648\u0686\u06a9.  \u0627\u06af\u0631 \u0628\u0631\u0627\u06cc \u0645\u062f\u062a \u0637\u0648\u0644\u0627\u0646\u06cc \u062a\u0631\u06cc \u0628\u0647 \u0622\u0645\u0648\u0632\u0634 \u0631\u0647\u0627 \u0645\u06cc \u0634\u062f\u060c \u0645\u062a\u0646 \u0628\u0633\u06cc\u0627\u0631 \u0641\u0626\u0648\u062f\u0648\u0631 \u0645\u0627\u0646\u0646\u062f \u062a\u0648\u0644\u06cc\u062f \u0645\u06cc \u06a9\u0631\u062f\u060c \u0632\u06cc\u0631\u0627 \u062a\u06a9\u0647 \u0647\u0627\u06cc \u0628\u0632\u0631\u06af\u06cc \u0627\u0632 \u0622\u0646 \u0631\u0627 \u062d\u0641\u0638 \u0645\u06cc \u06a9\u0631\u062f.<\/p>\n<div class=\"alert alert-note\">\n<div class=\"flex\">\n<div class=\"flex-shrink-0 mr-3\"><\/div>\n<div class=\"w-full\">\n<p><strong>\u062a\u0648\u062c\u0647 \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u06cc\u062f:<\/strong> \u0627\u0632 \u0622\u0646\u062c\u0627\u06cc\u06cc \u06a9\u0647 \u062e\u0631\u0648\u062c\u06cc \u0646\u0633\u0628\u062a\u0627\u064b \u067e\u0631\u0645\u062e\u0627\u0637\u0628 \u0627\u0633\u062a\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0622\u0646 \u0631\u0627 \u062a\u063a\u06cc\u06cc\u0631 \u062f\u0647\u06cc\u062f <code>verbose<\/code> \u0622\u0631\u06af\u0648\u0645\u0627\u0646 \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u0628\u0631\u0627\u0632\u0634 \u0645\u062f\u0644 \u0628\u0631\u0627\u06cc \u06a9\u0627\u0647\u0634 \u0645\u0642\u062f\u0627\u0631 \u0645\u062a\u0646 \u0631\u0648\u06cc \u0635\u0641\u062d\u0647 \u0646\u0645\u0627\u06cc\u0634<\/p>\n<\/p><\/div><\/div><\/div>\n<h3 id=\"modelinference\"><span class=\"ez-toc-section\" id=\"%d8%a7%d8%b3%d8%aa%d9%86%d8%aa%d8%a7%d8%ac_%d9%85%d8%af%d9%84\"><\/span>\u0627\u0633\u062a\u0646\u062a\u0627\u062c \u0645\u062f\u0644<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0628\u0631\u0627\u06cc \u0627\u0646\u062c\u0627\u0645 \u0627\u0633\u062a\u0646\u062a\u0627\u062c\u060c \u0645\u06cc \u062e\u0648\u0627\u0647\u06cc\u0645 \u0631\u0627\u0628\u0637 \u06a9\u0627\u0631\u0628\u0631\u06cc \u0631\u0627 \u062a\u06a9\u0631\u0627\u0631 \u06a9\u0646\u06cc\u0645 <code>TextSampler<\/code> &#8211; \u0631\u0648\u0634\u06cc \u06a9\u0647 \u0628\u0630\u0631 \u0648 \u0627\u0644\u0641 \u0645\u06cc \u067e\u0630\u06cc\u0631\u062f <code>response_length<\/code> (<code>max_tokens<\/code>).  \u0645\u0627 \u0627\u0632 \u0647\u0645\u0627\u0646 \u0631\u0648\u0634\u200c\u0647\u0627\u06cc\u06cc \u06a9\u0647 \u062f\u0631 \u0646\u0645\u0648\u0646\u0647\u200c\u06af\u0631 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u06a9\u0646\u06cc\u0645 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u06a9\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">sample_token<\/span>(<span class=\"hljs-params\">logits<\/span>):<\/span>\n        logits, indices = tf.math.top_k(logits, k=<span class=\"hljs-number\">5<\/span>, <span class=\"hljs-built_in\">sorted<\/span>=<span class=\"hljs-literal\">True<\/span>)\n        indices = np.asarray(indices).astype(<span class=\"hljs-string\">\"int32\"<\/span>)\n        preds = keras.activations.softmax(tf.expand_dims(logits, <span class=\"hljs-number\">0<\/span>))(<span class=\"hljs-number\">0<\/span>)\n        preds = np.asarray(preds).astype(<span class=\"hljs-string\">\"float32\"<\/span>)\n        <span class=\"hljs-keyword\">return<\/span> np.random.choice(indices, p=preds)\n\n<span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">generate_text<\/span>(<span class=\"hljs-params\">prompt, response_length=<span class=\"hljs-number\">20<\/span><\/span>):<\/span>\n    decoded_sample = prompt\n    <span class=\"hljs-keyword\">for<\/span> i <span class=\"hljs-keyword\">in<\/span> <span class=\"hljs-built_in\">range<\/span>(response_length-<span class=\"hljs-number\">1<\/span>):\n        tokenized_prompt = vectorize_layer((decoded_sample))(:, :-<span class=\"hljs-number\">1<\/span>)\n        predictions = model.predict((tokenized_prompt), verbose=<span class=\"hljs-number\">0<\/span>)\n        sample_index = <span class=\"hljs-built_in\">len<\/span>(decoded_sample.strip().split())-<span class=\"hljs-number\">1<\/span>\n\n        sampled_token = sample_token(predictions(<span class=\"hljs-number\">0<\/span>)(sample_index))\n        sampled_token = index_lookup(sampled_token)\n        decoded_sample += <span class=\"hljs-string\">\" \"<\/span> + sampled_token\n    <span class=\"hljs-keyword\">return<\/span> decoded_sample\n<\/code><\/pre>\n<p>\u062d\u0627\u0644\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0631\u0648\u0634 \u0631\u0627 \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f \u0631\u0648\u06cc \u0646\u0645\u0648\u0646\u0647 \u0647\u0627\u06cc \u062c\u062f\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">generate_text(<span class=\"hljs-string\">'the truth ultimately is'<\/span>)\n\n\ngenerate_text(<span class=\"hljs-string\">'the truth ultimately is'<\/span>)\n\n<\/code><\/pre>\n<h2 id=\"improvingresults\"><span class=\"ez-toc-section\" id=\"%d8%a8%d9%87%d8%a8%d9%88%d8%af_%d9%86%d8%aa%d8%a7%db%8c%d8%ac%d8%9f\"><\/span>\u0628\u0647\u0628\u0648\u062f \u0646\u062a\u0627\u06cc\u062c\u061f<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646\u060c \u0686\u06af\u0648\u0646\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0646\u062a\u0627\u06cc\u062c \u0631\u0627 \u0628\u0647\u0628\u0648\u062f \u0628\u062e\u0634\u06cc\u062f\u061f  \u0686\u0646\u062f \u06a9\u0627\u0631 \u0628\u0633\u06cc\u0627\u0631 \u0639\u0645\u0644\u06cc \u0648\u062c\u0648\u062f \u062f\u0627\u0631\u062f \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u062f:<\/p>\n<ul>\n<li>\u062a\u0645\u06cc\u0632 \u06a9\u0631\u062f\u0646 \u062f\u0627\u062f\u0647 \u0647\u0627 (\u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0648\u0631\u0648\u062f\u06cc \u0631\u0627 \u0628\u0627 \u062f\u0642\u062a \u0628\u06cc\u0634\u062a\u0631\u06cc \u062a\u0645\u06cc\u0632 \u06a9\u0646\u06cc\u062f\u060c \u0645\u0627 \u0641\u0642\u0637 \u06cc\u06a9 \u0639\u062f\u062f \u062a\u0642\u0631\u06cc\u0628\u06cc \u0631\u0627 \u0627\u0632 \u0627\u0628\u062a\u062f\u0627 \u06a9\u0648\u062a\u0627\u0647 \u06a9\u0631\u062f\u06cc\u0645 \u0648 \u06a9\u0627\u0631\u0627\u06a9\u062a\u0631\u0647\u0627\u06cc \u062e\u0637 \u062c\u062f\u06cc\u062f \u0631\u0627 \u062d\u0630\u0641 \u06a9\u0631\u062f\u06cc\u0645)<\/li>\n<li>\u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0628\u06cc\u0634\u062a\u0631\u06cc \u062f\u0631\u06cc\u0627\u0641\u062a \u06a9\u0646\u06cc\u062f (\u0645\u0627 \u0641\u0642\u0637 \u0628\u0627 \u0686\u0646\u062f \u0645\u06af\u0627\u0628\u0627\u06cc\u062a \u062f\u0627\u062f\u0647 \u0645\u062a\u0646\u06cc \u06a9\u0627\u0631 \u06a9\u0631\u062f\u06cc\u0645)<\/li>\n<li>\u0645\u062f\u0644 \u0631\u0627 \u062f\u0631 \u06a9\u0646\u0627\u0631 \u062f\u0627\u062f\u0647 \u0647\u0627 \u0645\u0642\u06cc\u0627\u0633 \u06a9\u0646\u06cc\u062f (\u0627\u0646\u0628\u0627\u0634\u062a\u0647 \u06a9\u0631\u062f\u0646 \u0631\u0645\u0632\u06af\u0634\u0627\u0647\u0627 \u06a9\u0627\u0631 \u0633\u062e\u062a\u06cc \u0646\u06cc\u0633\u062a!)<\/li>\n<\/ul>\n<h2 id=\"conclusion\"><span class=\"ez-toc-section\" id=\"%d9%86%d8%aa%db%8c%d8%ac%d9%87\"><\/span>\u0646\u062a\u06cc\u062c\u0647<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u062e\u0637 \u0644\u0648\u0644\u0647 \u067e\u06cc\u0634 \u067e\u0631\u062f\u0627\u0632\u0634 \u062d\u062f\u0627\u0642\u0644\u06cc \u0627\u0633\u062a \u0648 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0622\u0646 \u0631\u0627 \u0628\u0647\u0628\u0648\u062f \u0628\u062e\u0634\u06cc\u062f &#8211; \u062e\u0637 \u0644\u0648\u0644\u0647 \u0645\u0634\u062e\u0635 \u0634\u062f\u0647 \u062f\u0631 \u0627\u06cc\u0646 \u0631\u0627\u0647\u0646\u0645\u0627 \u06cc\u06a9 \u0645\u062f\u0644 \u0645\u0646\u0627\u0633\u0628 \u0628\u0647 \u0633\u0628\u06a9 GPT \u0627\u06cc\u062c\u0627\u062f \u06a9\u0631\u062f\u060c \u0628\u0627 \u062a\u0646\u0647\u0627 5 \u062e\u0637 \u06a9\u062f \u0645\u0648\u0631\u062f \u0646\u06cc\u0627\u0632 \u0628\u0631\u0627\u06cc \u0633\u0627\u062e\u062a \u06cc\u06a9 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0641\u0642\u0637 \u0631\u0645\u0632\u06af\u0634\u0627\u06cc \u0633\u0641\u0627\u0631\u0634\u06cc\u060c \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 Keras!<\/p>\n<p>\u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631\u0647\u0627 \u0628\u0631\u0627\u06cc \u0645\u062f\u0644\u200c\u0633\u0627\u0632\u06cc \u062a\u0648\u0627\u0644\u06cc \u0639\u0645\u0648\u0645\u06cc \u0645\u062d\u0628\u0648\u0628 \u0648 \u0628\u0647 \u0637\u0648\u0631 \u06af\u0633\u062a\u0631\u062f\u0647 \u0642\u0627\u0628\u0644 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0647\u0633\u062a\u0646\u062f (\u0648 \u0628\u0633\u06cc\u0627\u0631\u06cc \u0627\u0632 \u0686\u06cc\u0632\u0647\u0627 \u0631\u0627 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646 \u0628\u0647 \u0635\u0648\u0631\u062a \u062a\u0648\u0627\u0644\u06cc \u0628\u06cc\u0627\u0646 \u06a9\u0631\u062f).  \u062a\u0627 \u06a9\u0646\u0648\u0646\u060c \u0645\u0627\u0646\u0639 \u0627\u0635\u0644\u06cc \u0648\u0631\u0648\u062f\u060c \u06cc\u06a9 \u067e\u06cc\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u062f\u0633\u062a \u0648 \u067e\u0627 \u06af\u06cc\u0631 \u0628\u0648\u062f\u060c \u0627\u0645\u0627 \u0628\u0627 KerasNLP &#8211; \u0645\u062a\u062e\u0635\u0635\u0627\u0646 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0639\u0645\u06cc\u0642 \u0645\u06cc \u062a\u0648\u0627\u0646\u0646\u062f \u0627\u0632 \u067e\u06cc\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u0647\u0627 \u0628\u0631\u0627\u06cc \u0633\u0627\u062e\u062a \u0633\u0631\u06cc\u0639 \u0648 \u0622\u0633\u0627\u0646 \u0645\u062f\u0644 \u0647\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u0646\u062f.<\/p>\n<\/div>\n<p><script>\n                        !function(f,b,e,v,n,t,s)\n                        {if(f.fbq)return;n=f.fbq=function(){n.callMethod?\n                        n.callMethod.apply(n,arguments):n.queue.push(arguments)};\n                        if(!f._fbq)f._fbq=n;n.push=n;n.loaded=!0;n.version='2.0';\n                        n.queue=();t=b.createElement(e);t.async=!0;\n                        t.src=v;s=b.getElementsByTagName(e)(0);\n                        s.parentNode.insertBefore(t,s)}(window, document,'script',\n                        'https:\/\/connect.facebook.net\/en_US\/fbevents.js');\n                        fbq('init', '525232124909042');\n                        fbq('track', 'PageView');\n                    <\/script>    (\u0628\u0631\u0686\u0633\u0628\u200c\u0647\u0627 \u0628\u0647 \u062a\u0631\u062c\u0645\u0647)# python<br \/>\n<br \/><br \/>\n<br \/>\u0645\u0646\u062a\u0634\u0631 \u0634\u062f\u0647 \u062f\u0631 1403-01-06 05:24:03<br \/>\n<\/p>\n\n\n<div class=\"kk-star-ratings kksr-auto kksr-align-center kksr-valign-bottom\"\n    data-payload='{&quot;align&quot;:&quot;center&quot;,&quot;id&quot;:&quot;14745&quot;,&quot;slug&quot;:&quot;default&quot;,&quot;valign&quot;:&quot;bottom&quot;,&quot;ignore&quot;:&quot;&quot;,&quot;reference&quot;:&quot;auto&quot;,&quot;class&quot;:&quot;&quot;,&quot;count&quot;:&quot;0&quot;,&quot;legendonly&quot;:&quot;&quot;,&quot;readonly&quot;:&quot;&quot;,&quot;score&quot;:&quot;0&quot;,&quot;starsonly&quot;:&quot;&quot;,&quot;best&quot;:&quot;5&quot;,&quot;gap&quot;:&quot;5&quot;,&quot;greet&quot;:&quot;\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628&quot;,&quot;legend&quot;:&quot;0\\\/5 (0 \u0631\u0627\u06cc)&quot;,&quot;size&quot;:&quot;30&quot;,&quot;title&quot;:&quot;\u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 5 \u062e\u0637\u06cc \u0628\u0647 \u0633\u0628\u06a9 GPT \u062f\u0631 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0627 TensorFlow\\\/Keras&quot;,&quot;width&quot;:&quot;0&quot;,&quot;_legend&quot;:&quot;{score}\\\/{best} ({count} \u0631\u0627\u06cc)&quot;,&quot;font_factor&quot;:&quot;1.25&quot;}'>\n            \n<div class=\"kksr-stars\">\n    \n<div class=\"kksr-stars-inactive\">\n            <div class=\"kksr-star\" data-star=\"1\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"2\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"3\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"4\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"5\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n    \n<div class=\"kksr-stars-active\" style=\"width: 0px;\">\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n<\/div>\n                \n\n<div class=\"kksr-legend\" style=\"font-size: 24px;\">\n            <span class=\"kksr-muted\">\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628<\/span>\n    <\/div>\n    <\/div>\n","protected":false},"excerpt":{"rendered":"<p><span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 12<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span>\u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0631\u0632\u060c \u062d\u062a\u06cc \u0628\u0627 \u0648\u062c\u0648\u062f \u0627\u06cc\u0646\u06a9\u0647 \u062f\u0631 \u0633\u0627\u0644 2017 \u0645\u0646\u062a\u0634\u0631 \u0634\u062f\u060c \u062a\u0646\u0647\u0627 \u062f\u0631 \u0686\u0646\u062f \u0633\u0627\u0644 \u0627\u062e\u06cc\u0631 \u0634\u0631\u0648\u0639 \u0628\u0647 \u062c\u0630\u0628 \u0642\u0627\u0628\u0644 \u062a\u0648\u062c\u0647\u06cc \u06a9\u0631\u062f\u0647 \u0627\u0633\u062a. \u0628\u0627 \u06af\u0633\u062a\u0631\u0634 \u0641\u0646\u0627\u0648\u0631\u06cc \u0627\u0632 \u0637\u0631\u06cc\u0642 \u067e\u0644\u062a\u0641\u0631\u0645 \u0647\u0627\u06cc\u06cc \u0645\u0627\u0646\u0646\u062f HuggingFace\u060c NLP \u0648 \u0645\u062f\u0644\u200c\u0647\u0627\u06cc \u0632\u0628\u0627\u0646 \u0628\u0632\u0631\u06af (LLM) \u062f\u0631 \u062f\u0633\u062a\u0631\u0633 \u062a\u0631 \u0627\u0632 \u0647\u0645\u06cc\u0634\u0647 \u0634\u062f\u0647 \u0627\u0646\u062f. \u0628\u0627 \u0627\u06cc\u0646 \u062d\u0627\u0644 &#8211; \u062d\u062a\u06cc \u0628\u0627 \u0647\u0645\u0647 \u062a\u0628\u0644\u06cc\u063a\u0627\u062a \u0627\u0637\u0631\u0627\u0641 \u0622\u0646\u0647\u0627 \u0648 [&hellip;]<\/p>\n","protected":false},"author":3,"featured_media":9398,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1743,620],"tags":[],"class_list":["post-14745","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-python","category-programming"],"acf":[],"_links":{"self":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/14745","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/comments?post=14745"}],"version-history":[{"count":0,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/14745\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media\/9398"}],"wp:attachment":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media?parent=14745"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/categories?post=14745"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/tags?post=14745"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}