{"id":16275,"date":"2024-01-23T01:40:30","date_gmt":"2024-01-22T22:10:30","guid":{"rendered":"https:\/\/rasanegar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/"},"modified":"2024-01-23T01:40:30","modified_gmt":"2024-01-22T22:10:30","slug":"%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86","status":"publish","type":"post","link":"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/","title":{"rendered":"\u0645\u0642\u062f\u0645\u0647 \u0627\u06cc \u0628\u0631 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u0628\u0627 \u067e\u0627\u06cc\u062a\u0648\u0646"},"content":{"rendered":"<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_85 counter-hierarchy ez-toc-counter ez-toc-custom ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\"><p class=\"ez-toc-title\" style=\"cursor:inherit\">\u0633\u0631\u0641\u0635\u0644\u0647\u0627\u06cc \u0645\u0637\u0644\u0628<\/p>\n<\/div><nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d9%85%d8%b9%d8%b1%d9%81%db%8c\" >\u0645\u0639\u0631\u0641\u06cc<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c_%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c_%da%86%db%8c%d8%b3%d8%aa%d8%9f\" >\u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u0686\u06cc\u0633\u062a\u061f<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%aa%d8%a7%d8%b1%db%8c%d8%ae%da%86%d9%87_%d9%85%d8%ae%d8%aa%d8%b5%d8%b1_%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c_%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c\" >\u062a\u0627\u0631\u06cc\u062e\u0686\u0647 \u0645\u062e\u062a\u0635\u0631 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d9%88%d8%a7%da%98%d9%87_%d8%b4%d9%86%d8%a7%d8%b3%db%8c\" >\u0648\u0627\u0698\u0647 \u0634\u0646\u0627\u0633\u06cc<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%b9%d8%a7%d9%85%d9%84\" >\u0639\u0627\u0645\u0644<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d9%85%d8%ad%db%8c%d8%b7\" >\u0645\u062d\u06cc\u0637<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%aa%d8%a7%d8%a8%d8%b9_%d9%be%d8%a7%d8%af%d8%a7%d8%b4\" >\u062a\u0627\u0628\u0639 \u067e\u0627\u062f\u0627\u0634<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%aa%d8%a7%d8%a8%d8%b9_%d8%a7%d8%b1%d8%b2%d8%b4\" >\u062a\u0627\u0628\u0639 \u0627\u0631\u0632\u0634<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%ae%d8%b7_%d9%85%d8%b4%db%8c\" >\u062e\u0637 \u0645\u0634\u06cc<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%b1%d9%88%db%8c%da%a9%d8%b1%d8%af%d9%87%d8%a7%db%8c_%d8%a7%d8%b5%d9%84%db%8c\" >\u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627\u06cc \u0627\u0635\u0644\u06cc<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-11\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%b1%d9%88%db%8c%da%a9%d8%b1%d8%af_%d9%85%d8%a8%d8%aa%d9%86%db%8c_%d8%a8%d8%b1_%d8%b3%db%8c%d8%a7%d8%b3%d8%aa\" >\u0631\u0648\u06cc\u06a9\u0631\u062f \u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0633\u06cc\u0627\u0633\u062a<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-12\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%b1%d9%88%db%8c%da%a9%d8%b1%d8%af_%d8%a7%d8%b1%d8%b2%d8%b4_%d9%85%d8%ad%d9%88%d8%b1\" >\u0631\u0648\u06cc\u06a9\u0631\u062f \u0627\u0631\u0632\u0634 \u0645\u062d\u0648\u0631<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-13\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%a7%da%a9%d8%aa%d8%b4%d8%a7%d9%81_%d8%af%d8%b1_%d9%85%d9%82%d8%a7%d8%a8%d9%84_%d8%a8%d9%87%d8%b1%d9%87_%d8%a8%d8%b1%d8%af%d8%a7%d8%b1%db%8c\" >\u0627\u06a9\u062a\u0634\u0627\u0641 \u062f\u0631 \u0645\u0642\u0627\u0628\u0644 \u0628\u0647\u0631\u0647 \u0628\u0631\u062f\u0627\u0631\u06cc<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-14\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%db%8c%da%a9_%d8%b1%d8%a7%d9%87%d8%b2%d9%86_%da%86%d9%86%d8%af_%d9%85%d8%b3%d9%84%d8%ad\" >\u06cc\u06a9 \u0631\u0627\u0647\u0632\u0646 \u0686\u0646\u062f \u0645\u0633\u0644\u062d<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-15\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%b1%d9%88%d8%b4%e2%80%8c%d9%87%d8%a7%db%8c_%d8%b9%d9%85%d9%84_%d8%a7%d8%b1%d8%b2%d8%b4\" >\u0631\u0648\u0634\u200c\u0647\u0627\u06cc \u0639\u0645\u0644 \u0627\u0631\u0632\u0634<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-16\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%b2%db%8c%d8%a7%d8%af_%d8%ad%d8%b1%db%8c%d8%b5_%d9%86%d8%a8%d8%a7%d8%b4\" >\u0632\u06cc\u0627\u062f \u062d\u0631\u06cc\u0635 \u0646\u0628\u0627\u0634<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-17\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d8%b1%d8%a7%d9%87_%d8%ad%d9%84_%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\" >\u0631\u0627\u0647 \u062d\u0644 \u067e\u0627\u06cc\u062a\u0648\u0646<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-18\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d9%86%d8%aa%db%8c%d8%ac%d9%87\" >\u0646\u062a\u06cc\u062c\u0647<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-19\" href=\"https:\/\/rasanegaar.com\/blog\/%d9%85%d9%82%d8%af%d9%85%d9%87-%d8%a7%db%8c-%d8%a8%d8%b1-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c-%d8%a8%d8%a7-%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\/#%d9%85%d9%86%d8%a7%d8%a8%d8%b9\" >\u0645\u0646\u0627\u0628\u0639<\/a><\/li><\/ul><\/nav><\/div>\n<span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 7<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span><p> <br \/>\n<\/p>\n<div><noscript><\/noscript><\/p>\n<h2 id=\"introduction\"><span class=\"ez-toc-section\" id=\"%d9%85%d8%b9%d8%b1%d9%81%db%8c\"><\/span>\u0645\u0639\u0631\u0641\u06cc<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u0642\u0637\u0639\u0627 \u06cc\u06a9\u06cc \u0627\u0632 \u0641\u0639\u0627\u0644 \u062a\u0631\u06cc\u0646 \u0648 \u0645\u062d\u0631\u06a9 \u062a\u0631\u06cc\u0646 \u0632\u0645\u06cc\u0646\u0647 \u0647\u0627\u06cc \u062a\u062d\u0642\u06cc\u0642 \u062f\u0631 \u0647\u0648\u0634 \u0645\u0635\u0646\u0648\u0639\u06cc \u0627\u0633\u062a.<\/p>\n<p>\u0639\u0644\u0627\u0642\u0647 \u0628\u0647 \u0627\u06cc\u0646 \u0632\u0645\u06cc\u0646\u0647 \u062f\u0631 \u0637\u06cc \u0686\u0646\u062f \u0633\u0627\u0644 \u06af\u0630\u0634\u062a\u0647\u060c \u0628\u0647 \u062f\u0646\u0628\u0627\u0644 \u067e\u06cc\u0634\u0631\u0641\u062a\u200c\u0647\u0627\u06cc \u0628\u0632\u0631\u06af (\u0648 \u062a\u0628\u0644\u06cc\u063a\u0627\u062a\u06cc \u0628\u0633\u06cc\u0627\u0631) \u0645\u0627\u0646\u0646\u062f <a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/www.deepmind.com\/blog\/alphago-zero-starting-from-scratch\">DeepMind&#8217;s AlphaGo<\/a> \u0634\u06a9\u0633\u062a \u062f\u0627\u062f\u0646 \u06a9\u0644\u0645\u0647 \u0642\u0647\u0631\u0645\u0627\u0646 \u0645\u062f\u0644 \u0647\u0627\u06cc GO \u0648 OpenAI AI <a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/venturebeat.com\/2019\/04\/22\/openais-dota-2-bot-defeated-99-4-of-players-in-public-matches\/\" class=\"broken_link\">\u0634\u06a9\u0633\u062a \u062f\u0627\u062f\u0646 \u0628\u0627\u0632\u06cc\u06a9\u0646\u0627\u0646 \u062d\u0631\u0641\u0647 \u0627\u06cc DOTA<\/a>.<\/p>\n<p>\u0628\u0647 \u0644\u0637\u0641 \u0647\u0645\u0647 \u0627\u06cc\u0646 \u067e\u06cc\u0634\u0631\u0641\u062a\u200c\u0647\u0627\u060c \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u062f\u0631 \u062d\u0627\u0644 \u062d\u0627\u0636\u0631 \u062f\u0631 \u0632\u0645\u06cc\u0646\u0647\u200c\u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641\u060c \u0627\u0632 \u0645\u0631\u0627\u0642\u0628\u062a\u200c\u0647\u0627\u06cc \u0628\u0647\u062f\u0627\u0634\u062a\u06cc \u062a\u0627 \u0645\u0627\u0644\u06cc\u060c \u0627\u0632 \u0634\u06cc\u0645\u06cc \u062a\u0627 \u0645\u062f\u06cc\u0631\u06cc\u062a \u0645\u0646\u0627\u0628\u0639\u060c \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u0634\u0648\u062f.<\/p>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0628\u0647 \u0645\u0639\u0631\u0641\u06cc \u0645\u0641\u0627\u0647\u06cc\u0645 \u0648 \u0627\u0635\u0637\u0644\u0627\u062d\u0627\u062a \u0627\u0633\u0627\u0633\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u0645\u06cc \u067e\u0631\u062f\u0627\u0632\u06cc\u0645 \u0648 \u0622\u0646\u0647\u0627 \u0631\u0627 \u062f\u0631 \u06cc\u06a9 \u0645\u062b\u0627\u0644 \u06a9\u0627\u0631\u0628\u0631\u062f\u06cc \u0628\u0647 \u06a9\u0627\u0631 \u0645\u06cc \u0628\u0631\u06cc\u0645.<\/p>\n<h2 id=\"whatisreinforcementlearning\"><span class=\"ez-toc-section\" id=\"%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c_%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c_%da%86%db%8c%d8%b3%d8%aa%d8%9f\"><\/span>\u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u0686\u06cc\u0633\u062a\u061f<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p><a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/en.wikipedia.org\/wiki\/Reinforcement_learning\">\u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc<\/a> (RL) \u0634\u0627\u062e\u0647 \u0627\u06cc \u0627\u0632 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0627\u0634\u06cc\u0646\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0628\u0627 \u0628\u0627\u0632\u06cc\u06af\u0631\u0627\u0646 \u06cc\u0627 <strong>\u0639\u0648\u0627\u0645\u0644<\/strong>\u060c \u0627\u0646\u062c\u0627\u0645 \u0627\u0642\u062f\u0627\u0645\u0627\u062a \u0646\u0648\u0639\u06cc \u0627\u0633\u062a <strong>\u0645\u062d\u06cc\u0637<\/strong> \u0628\u0647 \u0645\u0646\u0638\u0648\u0631 \u0628\u0647 \u062d\u062f\u0627\u06a9\u062b\u0631 \u0631\u0633\u0627\u0646\u062f\u0646 \u0646\u0648\u0639\u06cc \u0627\u0632 <strong>\u062c\u0627\u06cc\u0632\u0647<\/strong> \u06a9\u0647 \u062f\u0631 \u0637\u0648\u0644 \u0645\u0633\u06cc\u0631 \u062c\u0645\u0639 \u0622\u0648\u0631\u06cc \u0645\u06cc \u06a9\u0646\u0646\u062f.<\/p>\n<p>\u0627\u06cc\u0646 \u0628\u0647 \u0639\u0645\u062f \u06cc\u06a9 \u062a\u0639\u0631\u06cc\u0641 \u0628\u0633\u06cc\u0627\u0631 \u0633\u0633\u062a \u0627\u0633\u062a\u060c \u0628\u0647 \u0647\u0645\u06cc\u0646 \u062f\u0644\u06cc\u0644 \u0627\u0633\u062a \u06a9\u0647 \u062a\u06a9\u0646\u06cc\u06a9 \u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u0631\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0628\u0631\u0627\u06cc \u0637\u06cc\u0641 \u0628\u0633\u06cc\u0627\u0631 \u06af\u0633\u062a\u0631\u062f\u0647 \u0627\u06cc \u0627\u0632 \u0645\u0633\u0627\u0626\u0644 \u062f\u0646\u06cc\u0627\u06cc \u0648\u0627\u0642\u0639\u06cc \u0628\u0647 \u06a9\u0627\u0631 \u0628\u0631\u062f.<\/p>\n<p>\u062a\u0635\u0648\u0631 \u06a9\u0646\u06cc\u062f \u0634\u062e\u0635\u06cc \u062f\u0631 \u062d\u0627\u0644 \u0627\u0646\u062c\u0627\u0645 \u06cc\u06a9 \u0628\u0627\u0632\u06cc \u0648\u06cc\u062f\u06cc\u0648\u06cc\u06cc \u0627\u0633\u062a.  \u0628\u0627\u0632\u06cc\u06a9\u0646 \u0639\u0627\u0645\u0644 \u0627\u0633\u062a \u0648 \u0628\u0627\u0632\u06cc \u0645\u062d\u06cc\u0637 \u0627\u0633\u062a.  \u067e\u0627\u062f\u0627\u0634 \u0647\u0627\u06cc\u06cc \u06a9\u0647 \u0628\u0627\u0632\u06cc\u06a9\u0646 \u062f\u0631\u06cc\u0627\u0641\u062a \u0645\u06cc \u06a9\u0646\u062f (\u06cc\u0639\u0646\u06cc \u0634\u06a9\u0633\u062a \u062f\u0627\u062f\u0646 \u06cc\u06a9 \u062f\u0634\u0645\u0646\u060c \u062a\u06a9\u0645\u06cc\u0644 \u06cc\u06a9 \u0633\u0637\u062d)\u060c \u06cc\u0627 <em>\u0646\u0645\u06cc \u06a9\u0646\u062f<\/em> get (\u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0645\u062b\u0627\u0644 \u0648\u0627\u0631\u062f \u06cc\u06a9 \u062a\u0644\u0647\u060c \u0634\u06a9\u0633\u062a \u062f\u0631 \u06cc\u06a9 \u0645\u0628\u0627\u0631\u0632\u0647) \u0628\u0647 \u0627\u0648 \u0645\u06cc \u0622\u0645\u0648\u0632\u062f \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 \u0628\u0627\u0632\u06cc\u06a9\u0646 \u0628\u0647\u062a\u0631\u06cc \u0628\u0627\u0634\u062f.<\/p>\n<p>\u0647\u0645\u0627\u0646\u0637\u0648\u0631 \u06a9\u0647 \u0627\u062d\u062a\u0645\u0627\u0644\u0627 \u0645\u062a\u0648\u062c\u0647 \u0634\u062f\u0647 \u0627\u06cc\u062f\u060c \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u0648\u0627\u0642\u0639\u0627\u064b \u062f\u0631 \u0645\u0642\u0648\u0644\u0647 \u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u062d\u062a \u0646\u0638\u0627\u0631\u062a\/\u0628\u062f\u0648\u0646 \u0646\u0638\u0627\u0631\u062a\/\u0646\u06cc\u0645\u0647 \u0646\u0638\u0627\u0631\u062a \u0642\u0631\u0627\u0631 \u0646\u0645\u06cc \u06af\u06cc\u0631\u062f.<\/p>\n<p>\u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0645\u062b\u0627\u0644\u060c \u062f\u0631 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0646\u0638\u0627\u0631\u062a \u0634\u062f\u0647\u060c \u0647\u0631 \u062a\u0635\u0645\u06cc\u0645\u06cc \u06a9\u0647 \u062a\u0648\u0633\u0637 \u0645\u062f\u0644 \u06af\u0631\u0641\u062a\u0647 \u0645\u06cc \u0634\u0648\u062f \u0645\u0633\u062a\u0642\u0644 \u0627\u0633\u062a \u0648 \u0628\u0631 \u0622\u0646\u0686\u0647 \u062f\u0631 \u0622\u06cc\u0646\u062f\u0647 \u0645\u06cc \u0628\u06cc\u0646\u06cc\u0645 \u062a\u0623\u062b\u06cc\u0631 \u0646\u0645\u06cc \u06af\u0630\u0627\u0631\u062f.<\/p>\n<p>\u062f\u0631 \u0639\u0648\u0636\u060c \u062f\u0631 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc\u060c \u0645\u0627 \u0628\u0647 \u06cc\u06a9 \u0627\u0633\u062a\u0631\u0627\u062a\u0698\u06cc \u0628\u0644\u0646\u062f\u0645\u062f\u062a \u0628\u0631\u0627\u06cc \u0639\u0627\u0645\u0644 \u062e\u0648\u062f \u0639\u0644\u0627\u0642\u0647 \u0645\u0646\u062f\u06cc\u0645\u060c \u06a9\u0647 \u0645\u0645\u06a9\u0646 \u0627\u0633\u062a \u0634\u0627\u0645\u0644 \u062a\u0635\u0645\u06cc\u0645\u0627\u062a \u063a\u06cc\u0631\u0628\u0647\u06cc\u0646\u0647 \u062f\u0631 \u0645\u0631\u0627\u062d\u0644 \u0645\u06cc\u0627\u0646\u06cc \u0648 \u06cc\u06a9 \u0645\u0628\u0627\u062f\u0644\u0647 \u0628\u06cc\u0646 <em>\u0627\u06a9\u062a\u0634\u0627\u0641<\/em> (\u0627\u0632 \u0645\u0633\u06cc\u0631\u0647\u0627\u06cc \u0646\u0627\u0634\u0646\u0627\u062e\u062a\u0647)\u060c \u0648 <em>\u0628\u0647\u0631\u0647 \u0628\u0631\u062f\u0627\u0631\u06cc<\/em> \u0627\u0632 \u0622\u0646\u0686\u0647 \u0642\u0628\u0644\u0627\u064b \u062f\u0631 \u0645\u0648\u0631\u062f \u0645\u062d\u06cc\u0637 \u0632\u06cc\u0633\u062a \u0645\u06cc \u062f\u0627\u0646\u06cc\u0645.<\/p>\n<h2 id=\"briefhistoryofreinforcementlearning\"><span class=\"ez-toc-section\" id=\"%d8%aa%d8%a7%d8%b1%db%8c%d8%ae%da%86%d9%87_%d9%85%d8%ae%d8%aa%d8%b5%d8%b1_%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c_%d8%aa%d9%82%d9%88%db%8c%d8%aa%db%8c\"><\/span>\u062a\u0627\u0631\u06cc\u062e\u0686\u0647 \u0645\u062e\u062a\u0635\u0631 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0628\u0631\u0627\u06cc \u0686\u0646\u062f\u06cc\u0646 \u062f\u0647\u0647 (\u0627\u0632 \u062f\u0647\u0647 1950!)\u060c \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u062f\u0648 \u0631\u0634\u062a\u0647 \u062a\u062d\u0642\u06cc\u0642\u0627\u062a\u06cc \u062c\u062f\u0627\u06af\u0627\u0646\u0647 \u0631\u0627 \u062f\u0646\u0628\u0627\u0644 \u06a9\u0631\u062f\u060c \u06cc\u06a9\u06cc \u0628\u0627 \u062a\u0645\u0631\u06a9\u0632 \u0631\u0648\u06cc <em>\u0627\u0632\u0645\u0627\u06cc\u0634 \u0648 \u062e\u0637\u0627<\/em> \u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627\u060c \u0648 \u06cc\u06a9 \u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0631\u0648\u06cc <em>\u06a9\u0646\u062a\u0631\u0644 \u0628\u0647\u06cc\u0646\u0647<\/em>.<\/p>\n<p>\u0647\u062f\u0641 \u0631\u0648\u0634 \u0647\u0627\u06cc \u06a9\u0646\u062a\u0631\u0644 \u0628\u0647\u06cc\u0646\u0647 \u0637\u0631\u0627\u062d\u06cc \u06cc\u06a9 \u06a9\u0646\u062a\u0631\u0644 \u06a9\u0646\u0646\u062f\u0647 \u0628\u0631\u0627\u06cc \u0628\u0647 \u062d\u062f\u0627\u0642\u0644 \u0631\u0633\u0627\u0646\u062f\u0646 \u0627\u0646\u062f\u0627\u0632\u0647 \u06af\u06cc\u0631\u06cc \u0631\u0641\u062a\u0627\u0631 \u06cc\u06a9 \u0633\u06cc\u0633\u062a\u0645 \u062f\u06cc\u0646\u0627\u0645\u06cc\u06a9\u06cc \u062f\u0631 \u0637\u0648\u0644 \u0632\u0645\u0627\u0646 \u0627\u0633\u062a.  \u0628\u0631\u0627\u06cc \u0631\u0633\u06cc\u062f\u0646 \u0628\u0647 \u0627\u06cc\u0646 \u0647\u062f\u0641\u060c \u0622\u0646\u0647\u0627 \u0639\u0645\u062f\u062a\u0627\u064b \u0627\u0632 \u0627\u0644\u06af\u0648\u0631\u06cc\u062a\u0645 \u0647\u0627\u06cc \u0628\u0631\u0646\u0627\u0645\u0647 \u0646\u0648\u06cc\u0633\u06cc \u067e\u0648\u06cc\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0631\u062f\u0646\u062f \u06a9\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u06cc\u062f \u06a9\u0647 \u067e\u0627\u06cc\u0647 \u0647\u0627\u06cc \u062a\u06a9\u0646\u06cc\u06a9 \u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u0645\u062f\u0631\u0646 \u0647\u0633\u062a\u0646\u062f.<\/p>\n<p>\u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627\u06cc \u0622\u0632\u0645\u0648\u0646 \u0648 \u062e\u0637\u0627\u060c \u062f\u0631 \u0639\u0648\u0636\u060c \u0631\u06cc\u0634\u0647 \u0647\u0627\u06cc \u0639\u0645\u06cc\u0642\u06cc \u062f\u0631 \u0631\u0648\u0627\u0646\u0634\u0646\u0627\u0633\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062d\u06cc\u0648\u0627\u0646\u0627\u062a \u0648 \u0639\u0644\u0648\u0645 \u0627\u0639\u0635\u0627\u0628 \u062f\u0627\u0631\u0646\u062f\u060c \u0648 \u0627\u06cc\u0646\u062c\u0627\u0633\u062a \u06a9\u0647 \u0627\u0635\u0637\u0644\u0627\u062d <em>\u062a\u0642\u0648\u06cc\u062a<\/em> \u0646\u0627\u0634\u06cc \u0627\u0632: \u0627\u0642\u062f\u0627\u0645\u0627\u062a \u0628\u0647 \u062f\u0646\u0628\u0627\u0644 (\u062a\u0642\u0648\u06cc\u062a \u0634\u062f\u0647) \u0628\u0627 \u0646\u062a\u0627\u06cc\u062c \u062e\u0648\u0628 \u06cc\u0627 \u0628\u062f \u062a\u0645\u0627\u06cc\u0644 \u0628\u0647 \u0627\u0646\u062a\u062e\u0627\u0628 \u0645\u062c\u062f\u062f \u0628\u0631 \u0627\u06cc\u0646 \u0627\u0633\u0627\u0633 \u062f\u0627\u0631\u0646\u062f.<\/p>\n<p>\u0628\u0631\u062e\u0627\u0633\u062a\u0647 \u0627\u0632 \u0645\u0637\u0627\u0644\u0639\u0647 \u0645\u06cc\u0627\u0646 \u0631\u0634\u062a\u0647 \u0627\u06cc \u0627\u06cc\u0646 \u062f\u0648 \u0631\u0634\u062a\u0647\u060c \u0631\u0634\u062a\u0647 \u0627\u06cc \u0628\u0647 \u0646\u0627\u0645 <a rel=\"nofollow noopener\" target=\"_blank\" href=\"http:\/\/www.scholarpedia.org\/article\/TD-learning\" class=\"broken_link\">\u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0641\u0627\u0648\u062a \u0632\u0645\u0627\u0646\u06cc (TD).<\/a>.<\/p>\n<p>\u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0627\u0634\u06cc\u0646 \u0645\u062f\u0631\u0646 \u0628\u0647 RL \u0639\u0645\u062f\u062a\u0627 \u0645\u0628\u062a\u0646\u06cc \u0627\u0633\u062a \u0631\u0648\u06cc TD-Learning\u060c \u06a9\u0647 \u0628\u0627 \u0633\u06cc\u06af\u0646\u0627\u0644\u200c\u0647\u0627\u06cc \u067e\u0627\u062f\u0627\u0634 \u0648 \u062a\u0627\u0628\u0639 \u0627\u0631\u0632\u0634 \u0633\u0631\u0648\u06a9\u0627\u0631 \u062f\u0627\u0631\u062f (\u062f\u0631 \u067e\u0627\u0631\u0627\u06af\u0631\u0627\u0641\u200c\u0647\u0627\u06cc \u0628\u0639\u062f\u06cc \u062c\u0632\u0626\u06cc\u0627\u062a \u0628\u06cc\u0634\u062a\u0631\u06cc \u0631\u0627 \u0645\u0634\u0627\u0647\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f).<\/p>\n<h2 id=\"terminology\"><span class=\"ez-toc-section\" id=\"%d9%88%d8%a7%da%98%d9%87_%d8%b4%d9%86%d8%a7%d8%b3%db%8c\"><\/span>\u0648\u0627\u0698\u0647 \u0634\u0646\u0627\u0633\u06cc<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0627\u06a9\u0646\u0648\u0646 \u0646\u06af\u0627\u0647\u06cc \u0628\u0647 \u0645\u0641\u0627\u0647\u06cc\u0645 \u0648 \u0627\u0635\u0637\u0644\u0627\u062d\u0627\u062a \u0627\u0635\u0644\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u0627\u0634\u062a.<\/p>\n<h3 id=\"agent\"><span class=\"ez-toc-section\" id=\"%d8%b9%d8%a7%d9%85%d9%84\"><\/span>\u0639\u0627\u0645\u0644<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0633\u06cc\u0633\u062a\u0645\u06cc \u06a9\u0647 \u062f\u0631 \u06cc\u06a9 \u0645\u062d\u06cc\u0637 \u062a\u0639\u0628\u06cc\u0647 \u0634\u062f\u0647 \u0648 \u0627\u0642\u062f\u0627\u0645\u0627\u062a\u06cc \u0631\u0627 \u0628\u0631\u0627\u06cc \u062a\u063a\u06cc\u06cc\u0631 \u0648\u0636\u0639\u06cc\u062a \u0645\u062d\u06cc\u0637 \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc \u062f\u0647\u062f.  \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0645\u062b\u0627\u0644 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0628\u0647 \u0631\u0648\u0628\u0627\u062a \u0647\u0627\u06cc \u0645\u062a\u062d\u0631\u06a9\u060c \u0639\u0648\u0627\u0645\u0644 \u0646\u0631\u0645 \u0627\u0641\u0632\u0627\u0631\u06cc \u06cc\u0627 \u06a9\u0646\u062a\u0631\u0644 \u06a9\u0646\u0646\u062f\u0647 \u0647\u0627\u06cc \u0635\u0646\u0639\u062a\u06cc \u0627\u0634\u0627\u0631\u0647 \u06a9\u0631\u062f.<\/p>\n<h3 id=\"environment\"><span class=\"ez-toc-section\" id=\"%d9%85%d8%ad%db%8c%d8%b7\"><\/span>\u0645\u062d\u06cc\u0637<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0633\u06cc\u0633\u062a\u0645 \u0628\u06cc\u0631\u0648\u0646\u06cc \u06a9\u0647 \u0639\u0627\u0645\u0644 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u00ab\u062f\u0631\u06a9\u00bb \u0648 \u0639\u0645\u0644 \u06a9\u0646\u062f \u0631\u0648\u06cc.<\/p>\n<p>\u0645\u062d\u06cc\u0637 \u0647\u0627 \u062f\u0631 RL \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0641\u0631\u0622\u06cc\u0646\u062f\u0647\u0627\u06cc \u062a\u0635\u0645\u06cc\u0645 \u06af\u06cc\u0631\u06cc \u0645\u0627\u0631\u06a9\u0648\u0641 (MDP) \u062a\u0639\u0631\u06cc\u0641 \u0645\u06cc \u0634\u0648\u0646\u062f.  MDP \u06cc\u06a9 \u062a\u0627\u067e\u0644 \u0627\u0633\u062a:<\/p>\n<p>$$<br \/>(S\u060c A\u060c P\u060c R\u060c \\gamma)<br \/>$$<\/p>\n<p>\u062c\u0627\u06cc\u06cc \u06a9\u0647:<\/p>\n<ul>\n<li><strong>\u0627\u0633<\/strong> \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u06cc \u0645\u062d\u062f\u0648\u062f \u0627\u0632 \u062d\u0627\u0644\u0627\u062a \u0627\u0633\u062a<\/li>\n<li><strong>\u0622<\/strong> \u0645\u062c\u0645\u0648\u0639\u0647 \u0645\u062d\u062f\u0648\u062f\u06cc \u0627\u0632 \u0627\u0642\u062f\u0627\u0645\u0627\u062a \u0627\u0633\u062a<\/li>\n<li><strong>\u067e<\/strong> \u06cc\u06a9 \u0645\u0627\u062a\u0631\u06cc\u0633 \u0627\u062d\u062a\u0645\u0627\u0644 \u0627\u0646\u062a\u0642\u0627\u0644 \u062d\u0627\u0644\u062a \u0627\u0633\u062a<\/li>\n<\/ul>\n<p>$$ P_{ss&#8217;}^{a} = \\mathbb{P}(S_{t+1} = s&#8217;| S_t = s\u060c A_t = a) $$\n<\/p>\n<p>$$ R_s^a = \\mathbb{E}(R_{t+1}|S_t=s\u060c A_t = a) $$\n<\/p>\n<ul>\n<li><strong>\u03b3<\/strong> \u06cc\u06a9 \u0639\u0627\u0645\u0644 \u062a\u062e\u0641\u06cc\u0641 \u0627\u0633\u062a\u060c <strong>\u03b3 \u2208 (0,1)<\/strong><\/li>\n<\/ul>\n<p><img decoding=\"async\" class=\"img-responsive\" src=\"https:\/\/rasanegar.com\/blog\/wp-content\/uploads\/2024\/01\/introduction-to-reinforcement-learning-with-python-1.png\" alt=\"\u062a\u0635\u0645\u06cc\u0645 \u0645\u0627\u0631\u06a9\u0648\u0641 process\" title=\"\"><\/p>\n<p>\u0628\u0633\u06cc\u0627\u0631\u06cc \u0627\u0632 \u0633\u0646\u0627\u0631\u06cc\u0648\u0647\u0627\u06cc \u062f\u0646\u06cc\u0627\u06cc \u0648\u0627\u0642\u0639\u06cc \u0631\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0628\u0647 \u0635\u0648\u0631\u062a \u0646\u0645\u0627\u06cc\u0634 \u062f\u0627\u062f <a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/en.wikipedia.org\/wiki\/Markov_decision_process\">\u0641\u0631\u0622\u06cc\u0646\u062f\u0647\u0627\u06cc \u062a\u0635\u0645\u06cc\u0645 \u06af\u06cc\u0631\u06cc \u0645\u0627\u0631\u06a9\u0648\u0641<\/a>\u060c \u0627\u0632 \u06cc\u06a9 \u0635\u0641\u062d\u0647 \u0634\u0637\u0631\u0646\u062c \u0633\u0627\u062f\u0647 \u062a\u0627 \u06cc\u06a9 \u0628\u0627\u0632\u06cc \u0648\u06cc\u062f\u06cc\u0648\u06cc\u06cc \u0628\u0633\u06cc\u0627\u0631 \u067e\u06cc\u0686\u06cc\u062f\u0647 \u062a\u0631.<\/p>\n<p>\u062f\u0631 \u06cc\u06a9 \u0645\u062d\u06cc\u0637 \u0634\u0637\u0631\u0646\u062c\u060c \u062d\u0627\u0644\u062a \u0647\u0627 \u0647\u0645\u0647 \u067e\u06cc\u06a9\u0631\u0628\u0646\u062f\u06cc \u0647\u0627\u06cc \u0645\u0645\u06a9\u0646 \u0635\u0641\u062d\u0647 \u0647\u0633\u062a\u0646\u062f (\u062a\u0639\u062f\u0627\u062f \u0632\u06cc\u0627\u062f\u06cc \u0648\u062c\u0648\u062f \u062f\u0627\u0631\u062f).  \u0627\u0639\u0645\u0627\u0644 \u0628\u0647 \u062d\u0631\u06a9\u062a \u062f\u0627\u062f\u0646 \u0645\u0647\u0631\u0647 \u0647\u0627\u060c \u062a\u0633\u0644\u06cc\u0645 \u0634\u062f\u0646 \u0648 \u063a\u06cc\u0631\u0647 \u0627\u0634\u0627\u0631\u0647 \u062f\u0627\u0631\u062f.<\/p>\n<p>\u067e\u0627\u062f\u0627\u0634 \u0647\u0627 \u0628\u0631 \u0627\u0633\u0627\u0633 \u0622\u0646 \u0627\u0633\u062a \u0631\u0648\u06cc \u0686\u0647 \u0628\u0631\u0646\u062f\u0647 \u0628\u0627\u0634\u06cc\u0645 \u06cc\u0627 \u0686\u0647 \u0628\u0628\u0627\u0632\u06cc\u0645\u060c \u0628\u0647 \u0637\u0648\u0631\u06cc \u06a9\u0647 \u0627\u0642\u062f\u0627\u0645\u0627\u062a \u0628\u0631\u0646\u062f\u0647 \u0628\u0627\u0632\u062f\u0647\u06cc \u0628\u0627\u0644\u0627\u062a\u0631\u06cc \u0646\u0633\u0628\u062a \u0628\u0647 \u0628\u0627\u062e\u062a \u062f\u0627\u0631\u0646\u062f.<\/p>\n<p>\u0627\u062d\u062a\u0645\u0627\u0644\u0627\u062a \u0627\u0646\u062a\u0642\u0627\u0644 \u062d\u0627\u0644\u062a \u0642\u0648\u0627\u0639\u062f \u0628\u0627\u0632\u06cc \u0631\u0627 \u0627\u062c\u0631\u0627 \u0645\u06cc \u06a9\u0646\u0646\u062f.  \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0645\u062b\u0627\u0644\u060c \u06cc\u06a9 \u0639\u0645\u0644 \u063a\u06cc\u0631\u0642\u0627\u0646\u0648\u0646\u06cc (\u062d\u0631\u06a9\u062a \u06cc\u06a9 \u0631\u062e \u0628\u0647 \u0635\u0648\u0631\u062a \u0645\u0648\u0631\u0628) \u0627\u062d\u062a\u0645\u0627\u0644 \u0635\u0641\u0631 \u062e\u0648\u0627\u0647\u062f \u062f\u0627\u0634\u062a.<\/p>\n<h3 id=\"rewardfunction\"><span class=\"ez-toc-section\" id=\"%d8%aa%d8%a7%d8%a8%d8%b9_%d9%be%d8%a7%d8%af%d8%a7%d8%b4\"><\/span>\u062a\u0627\u0628\u0639 \u067e\u0627\u062f\u0627\u0634<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u062a\u0627\u0628\u0639 \u067e\u0627\u062f\u0627\u0634 \u0648\u0636\u0639\u06cc\u062a \u0647\u0627 \u0631\u0627 \u0628\u0647 \u067e\u0627\u062f\u0627\u0634 \u0647\u0627\u06cc \u0622\u0646\u0647\u0627 \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f.  \u0627\u06cc\u0646 \u0627\u0637\u0644\u0627\u0639\u0627\u062a\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0639\u0648\u0627\u0645\u0644 \u0628\u0631\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0631\u0648\u0634 \u062d\u0631\u06a9\u062a \u062f\u0631 \u0645\u062d\u06cc\u0637 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u06a9\u0646\u0646\u062f.<\/p>\n<p>\u062a\u062d\u0642\u06cc\u0642\u0627\u062a \u0632\u06cc\u0627\u062f\u06cc \u0628\u0631\u0627\u06cc \u0637\u0631\u0627\u062d\u06cc \u06cc\u06a9 \u062a\u0627\u0628\u0639 \u067e\u0627\u062f\u0627\u0634 \u062e\u0648\u0628 \u0648 \u063a\u0644\u0628\u0647 \u0628\u0631 \u0645\u0634\u06a9\u0644 \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc \u0634\u0648\u062f <em>\u067e\u0627\u062f\u0627\u0634 \u0647\u0627\u06cc \u0627\u0646\u062f\u06a9<\/em>\u060c \u0632\u0645\u0627\u0646\u06cc \u06a9\u0647 \u0645\u0627\u0647\u06cc\u062a \u0627\u063a\u0644\u0628 \u067e\u0631\u0627\u06a9\u0646\u062f\u0647 \u067e\u0627\u062f\u0627\u0634 \u0647\u0627 \u062f\u0631 \u0645\u062d\u06cc\u0637 \u0628\u0647 \u0639\u0627\u0645\u0644 \u0627\u062c\u0627\u0632\u0647 \u0646\u0645\u06cc \u062f\u0647\u062f \u06a9\u0647 \u0628\u0647 \u062f\u0631\u0633\u062a\u06cc \u0627\u0632 \u0622\u0646 \u0628\u06cc\u0627\u0645\u0648\u0632\u062f.<\/p>\n<p>\u0628\u0631\u06af\u0634\u062a <strong>\u062c\u06cc<sub>\u062a\u06cc<\/sub><\/strong>  \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0645\u062c\u0645\u0648\u0639 \u062a\u062e\u0641\u06cc\u0641 \u067e\u0627\u062f\u0627\u0634 \u0627\u0632 \u0645\u0631\u062d\u0644\u0647 \u0632\u0645\u0627\u0646\u06cc \u062a\u0639\u0631\u06cc\u0641 \u0645\u06cc \u0634\u0648\u062f <strong>\u062a\u06cc<\/strong>.<\/p>\n<p>$$ G_t=\\sum_{k=0}^{\\infty} \\gamma^k R_{t+k+1} $$\n<\/p>\n<p><strong>\u03b3<\/strong> \u0639\u0627\u0645\u0644 \u062a\u062e\u0641\u06cc\u0641 \u0646\u0627\u0645\u06cc\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f \u0648 \u0628\u0627 \u06a9\u0627\u0647\u0634 \u0645\u0642\u062f\u0627\u0631 \u067e\u0627\u062f\u0627\u0634 \u0647\u0627 \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u0628\u0647 \u0633\u0645\u062a \u0622\u06cc\u0646\u062f\u0647 \u062d\u0631\u06a9\u062a \u0645\u06cc \u06a9\u0646\u06cc\u0645\u060c \u06a9\u0627\u0631 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u062a\u062e\u0641\u06cc\u0641 \u067e\u0627\u062f\u0627\u0634 \u0647\u0627 \u0628\u0647 \u0645\u0627 \u0627\u0645\u06a9\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f \u0639\u062f\u0645 \u0627\u0637\u0645\u06cc\u0646\u0627\u0646 \u062f\u0631 \u0645\u0648\u0631\u062f \u0622\u06cc\u0646\u062f\u0647 \u0631\u0627 \u0646\u0634\u0627\u0646 \u062f\u0647\u06cc\u0645\u060c \u0627\u0645\u0627 \u0647\u0645\u0686\u0646\u06cc\u0646 \u0628\u0647 \u0645\u0627 \u06a9\u0645\u06a9 \u0645\u06cc \u06a9\u0646\u062f \u0631\u0641\u062a\u0627\u0631 \u0627\u0646\u0633\u0627\u0646\u06cc \u0631\u0627 \u0628\u0647\u062a\u0631 \u0645\u062f\u0644 \u06a9\u0646\u06cc\u0645\u060c \u0632\u06cc\u0631\u0627 \u0646\u0634\u0627\u0646 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0627\u0633\u062a \u06a9\u0647 \u0627\u0646\u0633\u0627\u0646\/\u062d\u06cc\u0648\u0627\u0646 \u0628\u0631\u0627\u06cc \u067e\u0627\u062f\u0627\u0634 \u0647\u0627\u06cc \u0641\u0648\u0631\u06cc \u062a\u0631\u062c\u06cc\u062d \u0645\u06cc \u062f\u0647\u0646\u062f.<\/p>\n<h3 id=\"valuefunction\"><span class=\"ez-toc-section\" id=\"%d8%aa%d8%a7%d8%a8%d8%b9_%d8%a7%d8%b1%d8%b2%d8%b4\"><\/span>\u062a\u0627\u0628\u0639 \u0627\u0631\u0632\u0634<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u062a\u0627\u0628\u0639 \u0645\u0642\u062f\u0627\u0631 \u0627\u062d\u062a\u0645\u0627\u0644\u0627\u064b \u0645\u0647\u0645 \u062a\u0631\u06cc\u0646 \u0627\u0637\u0644\u0627\u0639\u0627\u062a\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u062f\u0631 \u0645\u0648\u0631\u062f \u06cc\u06a9 \u0645\u0634\u06a9\u0644 RL \u0646\u06af\u0647 \u062f\u0627\u0631\u06cc\u0645.<\/p>\n<p>\u0628\u0647 \u0637\u0648\u0631 \u0631\u0633\u0645\u06cc\u060c \u062a\u0627\u0628\u0639 \u0645\u0642\u062f\u0627\u0631 \u0627\u0633\u062a <em>\u0628\u0627\u0632\u062f\u0647 \u0645\u0648\u0631\u062f \u0627\u0646\u062a\u0638\u0627\u0631 \u0628\u0627 \u0634\u0631\u0648\u0639 \u0627\u0632 \u062d\u0627\u0644\u062a s<\/em>.  \u062f\u0631 \u0639\u0645\u0644\u060c \u062a\u0627\u0628\u0639 \u0627\u0631\u0632\u0634 \u0628\u0647 \u0645\u0627 \u0645\u06cc \u06af\u0648\u06cc\u062f \u06a9\u0647 \u0686\u0642\u062f\u0631 \u062e\u0648\u0628 \u0627\u0633\u062a \u06a9\u0647 \u0639\u0627\u0645\u0644 \u062f\u0631 \u06cc\u06a9 \u0648\u0636\u0639\u06cc\u062a \u062e\u0627\u0635 \u0628\u0627\u0634\u062f.  \u0647\u0631 \u0686\u0647 \u0627\u0631\u0632\u0634 \u06cc\u06a9 \u062d\u0627\u0644\u062a \u0628\u0627\u0644\u0627\u062a\u0631 \u0628\u0627\u0634\u062f\u060c \u0645\u06cc\u0632\u0627\u0646 \u067e\u0627\u062f\u0627\u0634\u06cc \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0646\u062a\u0638\u0627\u0631 \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u06cc\u0645 \u0628\u06cc\u0634\u062a\u0631 \u0627\u0633\u062a:<\/p>\n<p>$$ v_\\pi (s) = \\mathbb{E}_\\pi (G_t|S_t = s) $$\n<\/p>\n<p>\u0646\u0627\u0645 \u0648\u0627\u0642\u0639\u06cc \u0627\u06cc\u0646 \u062a\u0627\u0628\u0639 \u0627\u0633\u062a <strong>\u0627\u0631\u0632\u0634 \u062f\u0648\u0644\u062a\u06cc<\/strong> \u062a\u0627\u0628\u0639\u060c \u0628\u0631\u0627\u06cc \u0645\u062a\u0645\u0627\u06cc\u0632 \u06a9\u0631\u062f\u0646 \u0622\u0646 \u0627\u0632 \u06cc\u06a9 \u0639\u0646\u0635\u0631 \u0645\u0647\u0645 \u062f\u06cc\u06af\u0631 \u062f\u0631 RL: the <strong>\u0639\u0645\u0644-\u0627\u0631\u0632\u0634<\/strong> \u062a\u0627\u0628\u0639.<\/p>\n<p>\u062a\u0627\u0628\u0639 action-value \u0645\u0642\u062f\u0627\u0631\u060c \u06cc\u0639\u0646\u06cc \u0628\u0627\u0632\u06af\u0634\u062a \u0645\u0648\u0631\u062f \u0627\u0646\u062a\u0638\u0627\u0631 \u0631\u0627 \u0628\u0631\u0627\u06cc \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 action \u0628\u0647 \u0645\u0627 \u0645\u06cc \u062f\u0647\u062f <em>\u0622<\/em> \u062f\u0631 \u06cc\u06a9 \u062d\u0627\u0644\u062a \u062e\u0627\u0635 <em>\u0633<\/em>:<\/p>\n<p>$$ q_\\pi (s, a) = \\mathbb{E}_\\pi (G_t|S_t = s\u060c A_t = a) $$\n<\/p>\n<h3 id=\"policy\"><span class=\"ez-toc-section\" id=\"%d8%ae%d8%b7_%d9%85%d8%b4%db%8c\"><\/span>\u062e\u0637 \u0645\u0634\u06cc<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0627\u06cc\u0646 \u062e\u0637 \u0645\u0634\u06cc \u0631\u0641\u062a\u0627\u0631 \u0646\u0645\u0627\u06cc\u0646\u062f\u0647 \u0645\u0627 \u0631\u0627 \u062f\u0631 MDP \u062a\u0639\u0631\u06cc\u0641 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u0628\u0647 \u0637\u0648\u0631 \u0631\u0633\u0645\u06cc\u060c \u0633\u06cc\u0627\u0633\u062a \u0647\u0627 \u0647\u0633\u062a\u0646\u062f <em>\u062a\u0648\u0632\u06cc\u0639 \u0628\u0631 \u0631\u0648\u06cc \u0627\u0642\u062f\u0627\u0645\u0627\u062a \u062f\u0631 \u062d\u0627\u0644\u0627\u062a \u062f\u0627\u062f\u0647 \u0634\u062f\u0647<\/em>.  \u06cc\u06a9 \u0633\u06cc\u0627\u0633\u062a \u0628\u0647 \u0627\u062d\u062a\u0645\u0627\u0644 \u0627\u0646\u062c\u0627\u0645 \u0647\u0631 \u0627\u0642\u062f\u0627\u0645 \u0627\u0632 \u0622\u0646 \u062d\u0627\u0644\u062a \u062d\u0627\u0644\u062a \u0647\u0627 \u0631\u0627 \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f:<\/p>\n<p>$$ \\pi (a|s) = \\mathbb{P}(A_t = a|S_t=s) $$\n<\/p>\n<p>\u0647\u062f\u0641 \u0646\u0647\u0627\u06cc\u06cc RL \u06cc\u0627\u0641\u062a\u0646 \u06cc\u06a9 \u062e\u0637 \u0645\u0634\u06cc \u0628\u0647\u06cc\u0646\u0647 (\u06cc\u0627 \u0628\u0647 \u0627\u0646\u062f\u0627\u0632\u0647 \u06a9\u0627\u0641\u06cc \u062e\u0648\u0628) \u0628\u0631\u0627\u06cc \u0646\u0645\u0627\u06cc\u0646\u062f\u0647 \u0645\u0627 \u0627\u0633\u062a.  \u062f\u0631 \u0645\u062b\u0627\u0644 \u0628\u0627\u0632\u06cc \u0648\u06cc\u062f\u06cc\u0648\u06cc\u06cc\u060c \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u062e\u0637\u200c\u0645\u0634\u06cc \u0631\u0627 \u0628\u0647\u200c\u0639\u0646\u0648\u0627\u0646 \u0627\u0633\u062a\u0631\u0627\u062a\u0698\u06cc\u200c\u0627\u06cc \u06a9\u0647 \u0628\u0627\u0632\u06cc\u06a9\u0646 \u062f\u0646\u0628\u0627\u0644 \u0645\u06cc\u200c\u06a9\u0646\u062f \u062f\u0631 \u0646\u0638\u0631 \u0628\u06af\u06cc\u0631\u06cc\u062f\u060c \u06cc\u0639\u0646\u06cc \u0627\u0642\u062f\u0627\u0645\u0627\u062a\u06cc \u06a9\u0647 \u0628\u0627\u0632\u06cc\u06a9\u0646 \u062f\u0631 \u0635\u0648\u0631\u062a \u0627\u0631\u0627\u0626\u0647 \u0633\u0646\u0627\u0631\u06cc\u0648\u0647\u0627\u06cc \u062e\u0627\u0635 \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc\u200c\u062f\u0647\u062f.<\/p>\n<h2 id=\"mainapproaches\"><span class=\"ez-toc-section\" id=\"%d8%b1%d9%88%db%8c%da%a9%d8%b1%d8%af%d9%87%d8%a7%db%8c_%d8%a7%d8%b5%d9%84%db%8c\"><\/span>\u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627\u06cc \u0627\u0635\u0644\u06cc<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0645\u062f\u0644\u200c\u0647\u0627 \u0648 \u0627\u0644\u06af\u0648\u0631\u06cc\u062a\u0645\u200c\u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641\u06cc \u0628\u0631\u0627\u06cc \u0645\u0633\u0627\u0626\u0644 RL \u0627\u0639\u0645\u0627\u0644 \u0645\u06cc\u200c\u0634\u0648\u0646\u062f.<\/p>\n<p>\u0648\u0627\u0642\u0639\u0627\u060c <a rel=\"nofollow noopener\" target=\"_blank\" href=\"https:\/\/en.wikipedia.org\/wiki\/Reinforcement_learning#Comparison_of_reinforcement_learning_algorithms\"><strong>\u0632\u06cc\u0627\u062f<\/strong><\/a>.<\/p>\n<p>\u0628\u0627 \u0627\u06cc\u0646 \u062d\u0627\u0644\u060c \u0647\u0645\u0647 \u0622\u0646\u0647\u0627 \u06a9\u0645 \u0648 \u0628\u06cc\u0634 \u062f\u0631 \u062f\u0648 \u062f\u0633\u062a\u0647 \u0642\u0631\u0627\u0631 \u0645\u06cc \u06af\u06cc\u0631\u0646\u062f: <em>\u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0633\u06cc\u0627\u0633\u062a<\/em>\u060c \u0648 <em>\u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0627\u0631\u0632\u0634<\/em>.<\/p>\n<h3 id=\"policybasedapproach\"><span class=\"ez-toc-section\" id=\"%d8%b1%d9%88%db%8c%da%a9%d8%b1%d8%af_%d9%85%d8%a8%d8%aa%d9%86%db%8c_%d8%a8%d8%b1_%d8%b3%db%8c%d8%a7%d8%b3%d8%aa\"><\/span>\u0631\u0648\u06cc\u06a9\u0631\u062f \u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0633\u06cc\u0627\u0633\u062a<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u062f\u0631 \u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627\u06cc \u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0633\u06cc\u0627\u0633\u062a \u0628\u0647 RL\u060c \u0647\u062f\u0641 \u0645\u0627 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u0647\u062a\u0631\u06cc\u0646 \u062e\u0637 \u0645\u0634\u06cc \u0645\u0645\u06a9\u0646 \u0627\u0633\u062a.  \u0645\u062f\u0644\u200c\u0647\u0627\u06cc \u062e\u0637\u200c\u0645\u0634\u06cc \u0645\u0633\u062a\u0642\u06cc\u0645\u0627\u064b \u0628\u0647\u062a\u0631\u06cc\u0646 \u062d\u0631\u06a9\u062a \u0645\u0645\u06a9\u0646 \u0631\u0627 \u0627\u0632 \u0648\u0636\u0639\u06cc\u062a \u0641\u0639\u0644\u06cc \u06cc\u0627 \u062a\u0648\u0632\u06cc\u0639 \u0628\u0631 \u0631\u0648\u06cc \u0627\u0642\u062f\u0627\u0645\u0627\u062a \u0645\u0645\u06a9\u0646 \u0628\u0647\u200c\u062f\u0633\u062a \u0645\u06cc\u200c\u0622\u0648\u0631\u0646\u062f.<\/p>\n<h3 id=\"valuebasedapproach\"><span class=\"ez-toc-section\" id=\"%d8%b1%d9%88%db%8c%da%a9%d8%b1%d8%af_%d8%a7%d8%b1%d8%b2%d8%b4_%d9%85%d8%ad%d9%88%d8%b1\"><\/span>\u0631\u0648\u06cc\u06a9\u0631\u062f \u0627\u0631\u0632\u0634 \u0645\u062d\u0648\u0631<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u062f\u0631 \u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627\u06cc \u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u0627\u0631\u0632\u0634\u060c \u0645\u0627 \u0645\u06cc\u200c\u062e\u0648\u0627\u0647\u06cc\u0645 \u062a\u0627\u0628\u0639 \u0645\u0642\u062f\u0627\u0631 \u0628\u0647\u06cc\u0646\u0647 \u0631\u0627 \u067e\u06cc\u062f\u0627 \u06a9\u0646\u06cc\u0645 \u06a9\u0647 \u062a\u0627\u0628\u0639 \u0645\u0642\u062f\u0627\u0631 \u062d\u062f\u0627\u06a9\u062b\u0631 \u0628\u0631 \u0631\u0648\u06cc \u0647\u0645\u0647 \u0633\u06cc\u0627\u0633\u062a\u200c\u0647\u0627 \u0627\u0633\u062a.<\/p>\n<p>\u0633\u067e\u0633 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u0645 \u0628\u0631 \u0627\u0633\u0627\u0633 \u0627\u0642\u062f\u0627\u0645\u0627\u062a\u06cc \u06a9\u0647 \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u0645 (\u06cc\u0639\u0646\u06cc \u0627\u0632 \u06a9\u062f\u0627\u0645 \u062e\u0637\u200c\u0645\u0634\u06cc \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u0645) \u0627\u0646\u062a\u062e\u0627\u0628 \u06a9\u0646\u06cc\u0645 \u0631\u0648\u06cc \u0645\u0642\u0627\u062f\u06cc\u0631\u06cc \u06a9\u0647 \u0627\u0632 \u0645\u062f\u0644 \u062f\u0631\u06cc\u0627\u0641\u062a \u0645\u06cc \u06a9\u0646\u06cc\u0645.<\/p>\n<h2 id=\"explorationvsexploitation\"><span class=\"ez-toc-section\" id=\"%d8%a7%da%a9%d8%aa%d8%b4%d8%a7%d9%81_%d8%af%d8%b1_%d9%85%d9%82%d8%a7%d8%a8%d9%84_%d8%a8%d9%87%d8%b1%d9%87_%d8%a8%d8%b1%d8%af%d8%a7%d8%b1%db%8c\"><\/span>\u0627\u06a9\u062a\u0634\u0627\u0641 \u062f\u0631 \u0645\u0642\u0627\u0628\u0644 \u0628\u0647\u0631\u0647 \u0628\u0631\u062f\u0627\u0631\u06cc<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u062f\u0627\u062f \u0648 \u0633\u062a\u062f \u0628\u06cc\u0646 <em>\u0627\u06a9\u062a\u0634\u0627\u0641<\/em> \u0648 <em>\u0628\u0647\u0631\u0647 \u0628\u0631\u062f\u0627\u0631\u06cc<\/em> \u0628\u0647 \u0637\u0648\u0631 \u06af\u0633\u062a\u0631\u062f\u0647 \u062f\u0631 \u0627\u062f\u0628\u06cc\u0627\u062a RL \u0645\u0648\u0631\u062f \u0645\u0637\u0627\u0644\u0639\u0647 \u0642\u0631\u0627\u0631 \u06af\u0631\u0641\u062a\u0647 \u0627\u0633\u062a.<\/p>\n<p>\u06a9\u0627\u0648\u0634 \u0628\u0647 \u0639\u0645\u0644 \u0628\u0627\u0632\u062f\u06cc\u062f \u0648 \u062c\u0645\u0639\u200c\u0622\u0648\u0631\u06cc \u0627\u0637\u0644\u0627\u0639\u0627\u062a \u062f\u0631 \u0645\u0648\u0631\u062f \u0648\u0636\u0639\u06cc\u062a\u200c\u0647\u0627\u06cc\u06cc \u062f\u0631 \u0645\u062d\u06cc\u0637\u06cc \u06af\u0641\u062a\u0647 \u0645\u06cc\u200c\u0634\u0648\u062f \u06a9\u0647 \u0647\u0646\u0648\u0632 \u0627\u0632 \u0622\u0646\u200c\u0647\u0627 \u0628\u0627\u0632\u062f\u06cc\u062f \u0646\u06a9\u0631\u062f\u0647\u200c\u0627\u06cc\u0645 \u06cc\u0627 \u0647\u0646\u0648\u0632 \u0627\u0637\u0644\u0627\u0639\u0627\u062a \u0632\u06cc\u0627\u062f\u06cc \u062f\u0631\u0628\u0627\u0631\u0647 \u0622\u0646\u200c\u0647\u0627 \u0646\u062f\u0627\u0631\u06cc\u0645.  \u0627\u06cc\u062f\u0647 \u0627\u06cc\u0646 \u0627\u0633\u062a \u06a9\u0647 \u0628\u0631\u0631\u0633\u06cc MDP \u0645\u0627 \u0645\u0645\u06a9\u0646 \u0627\u0633\u062a \u0645\u0627 \u0631\u0627 \u0628\u0647 \u062a\u0635\u0645\u06cc\u0645\u0627\u062a \u0628\u0647\u062a\u0631 \u062f\u0631 \u0622\u06cc\u0646\u062f\u0647 \u0647\u062f\u0627\u06cc\u062a \u06a9\u0646\u062f.<\/p>\n<p>\u0627\u0632 \u0637\u0631\u0641 \u062f\u06cc\u06af\u0631\u060c \u0628\u0647\u0631\u0647 \u0628\u0631\u062f\u0627\u0631\u06cc \u0634\u0627\u0645\u0644 \u06af\u0631\u0641\u062a\u0646 \u0628\u0647\u062a\u0631\u06cc\u0646 \u062a\u0635\u0645\u06cc\u0645 \u0628\u0627 \u062a\u0648\u062c\u0647 \u0628\u0647 \u062f\u0627\u0646\u0634 \u0641\u0639\u0644\u06cc \u0627\u0633\u062a \u06a9\u0647 \u062f\u0631 \u062d\u0628\u0627\u0628 \u0622\u0646\u0686\u0647 \u0642\u0628\u0644\u0627 \u0634\u0646\u0627\u062e\u062a\u0647 \u0634\u062f\u0647 \u0627\u0633\u062a \u0631\u0627\u062d\u062a \u0627\u0633\u062a.<\/p>\n<p>\u062f\u0631 \u0645\u062b\u0627\u0644 \u0632\u06cc\u0631 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u06cc\u062f \u06a9\u0647 \u0686\u06af\u0648\u0646\u0647 \u0627\u06cc\u0646 \u0645\u0641\u0627\u0647\u06cc\u0645 \u062f\u0631 \u06cc\u06a9 \u0645\u0633\u0626\u0644\u0647 \u0648\u0627\u0642\u0639\u06cc \u06a9\u0627\u0631\u0628\u0631\u062f \u062f\u0627\u0631\u0646\u062f.<\/p>\n<h2 id=\"amultiarmedbandit\"><span class=\"ez-toc-section\" id=\"%db%8c%da%a9_%d8%b1%d8%a7%d9%87%d8%b2%d9%86_%da%86%d9%86%d8%af_%d9%85%d8%b3%d9%84%d8%ad\"><\/span>\u06cc\u06a9 \u0631\u0627\u0647\u0632\u0646 \u0686\u0646\u062f \u0645\u0633\u0644\u062d<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0645\u0627 \u0627\u06a9\u0646\u0648\u0646 \u0628\u0647 \u06cc\u06a9 \u0645\u062b\u0627\u0644 \u0639\u0645\u0644\u06cc \u0627\u0632 \u06cc\u06a9 \u0645\u0633\u0626\u0644\u0647 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc &#8211; the <em>\u0631\u0627\u0647\u0632\u0646 \u0686\u0646\u062f \u0645\u0633\u0644\u062d<\/em> \u0645\u0633\u0626\u0644\u0647.<\/p>\n<p>\u0631\u0627\u0647\u0632\u0646 \u0686\u0646\u062f \u062f\u0633\u062a\u06cc \u06cc\u06a9\u06cc \u0627\u0632 \u0645\u062d\u0628\u0648\u0628 \u062a\u0631\u06cc\u0646 \u0645\u0634\u06a9\u0644\u0627\u062a \u062f\u0631 RL \u0627\u0633\u062a:<\/p>\n<blockquote>\n<p><em>\u0634\u0645\u0627 \u0628\u0647 \u0637\u0648\u0631 \u0645\u06a9\u0631\u0631 \u0628\u0627 \u06cc\u06a9 \u0627\u0646\u062a\u062e\u0627\u0628 \u0627\u0632 \u0645\u06cc\u0627\u0646 k \u06af\u0632\u06cc\u0646\u0647 \u06cc\u0627 \u0627\u0642\u062f\u0627\u0645\u0627\u062a \u0645\u062e\u062a\u0644\u0641 \u0631\u0648\u0628\u0631\u0648 \u0647\u0633\u062a\u06cc\u062f.  \u067e\u0633 \u0627\u0632 \u0647\u0631 \u0627\u0646\u062a\u062e\u0627\u0628\u060c \u06cc\u06a9 \u067e\u0627\u062f\u0627\u0634 \u0639\u062f\u062f\u06cc \u0627\u0646\u062a\u062e\u0627\u0628 \u0634\u062f\u0647 \u0627\u0632 \u062a\u0648\u0632\u06cc\u0639 \u0627\u062d\u062a\u0645\u0627\u0644 \u062b\u0627\u0628\u062a \u062f\u0631\u06cc\u0627\u0641\u062a \u0645\u06cc \u06a9\u0646\u06cc\u062f \u06a9\u0647 \u0628\u0633\u062a\u06af\u06cc \u062f\u0627\u0631\u062f \u0631\u0648\u06cc \u0627\u0642\u062f\u0627\u0645\u06cc \u06a9\u0647 \u0627\u0646\u062a\u062e\u0627\u0628 \u06a9\u0631\u062f\u06cc\u062f  \u0647\u062f\u0641 \u0634\u0645\u0627 \u0627\u06cc\u0646 \u0627\u0633\u062a \u06a9\u0647 \u06a9\u0644 \u067e\u0627\u062f\u0627\u0634 \u0645\u0648\u0631\u062f \u0627\u0646\u062a\u0638\u0627\u0631 \u0631\u0627 \u062f\u0631 \u06cc\u06a9 \u062f\u0648\u0631\u0647 \u0632\u0645\u0627\u0646\u06cc \u0628\u0647 \u062d\u062f\u0627\u06a9\u062b\u0631 \u0628\u0631\u0633\u0627\u0646\u06cc\u062f\u060c \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0645\u062b\u0627\u0644\u060c \u0628\u06cc\u0634 \u0627\u0632 1000 \u0627\u0646\u062a\u062e\u0627\u0628 \u0627\u0642\u062f\u0627\u0645 \u06cc\u0627 \u0645\u0631\u062d\u0644\u0647 \u0632\u0645\u0627\u0646\u06cc.<\/em><\/p>\n<\/blockquote>\n<p>\u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0622\u0646 \u0631\u0627 \u062f\u0631 \u0642\u06cc\u0627\u0633 \u0628\u0627 \u06cc\u06a9 \u0645\u0627\u0634\u06cc\u0646 \u0627\u0633\u0644\u0627\u062a (\u06cc\u06a9 \u0631\u0627\u0647\u0632\u0646 \u06cc\u06a9 \u062f\u0633\u062a) \u062f\u0631 \u0646\u0638\u0631 \u0628\u06af\u06cc\u0631\u06cc\u062f.  \u0647\u0631 \u0627\u0646\u062a\u062e\u0627\u0628 \u0627\u06a9\u0634\u0646 \u0645\u0627\u0646\u0646\u062f \u0628\u0627\u0632\u06cc \u06cc\u06a9\u06cc \u0627\u0632 \u0627\u0647\u0631\u0645\u200c\u0647\u0627\u06cc \u062f\u0633\u062a\u06af\u0627\u0647 \u0627\u0633\u0644\u0627\u062a \u0627\u0633\u062a \u0648 \u067e\u0627\u062f\u0627\u0634\u200c\u0647\u0627\u06cc\u06cc \u06a9\u0647 \u0628\u0631\u0627\u06cc \u0631\u0633\u06cc\u062f\u0646 \u0628\u0647 \u062c\u06a9\u200c\u067e\u0627\u062a \u0628\u0647 \u062f\u0633\u062a \u0645\u06cc\u200c\u0622\u06cc\u0646\u062f.<\/p>\n<p><img decoding=\"async\" class=\"img-responsive\" src=\"https:\/\/rasanegar.com\/blog\/wp-content\/uploads\/2024\/01\/introduction-to-reinforcement-learning-with-python-2.jpg\" alt=\"\u0645\u0627\u0634\u06cc\u0646 \u0631\u0627\u0647\u0632\u0646\" title=\"\"><\/p>\n<p>\u062d\u0644 \u0627\u06cc\u0646 \u0645\u0634\u06a9\u0644 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0639\u0646\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0628\u0647 \u06cc\u06a9 \u0628\u0647\u06cc\u0646\u0647 \u0628\u0631\u0633\u06cc\u0645 <em>\u062e\u0637 \u0645\u0634\u06cc<\/em>: \u0627\u0633\u062a\u0631\u0627\u062a\u0698\u06cc \u06a9\u0647 \u0628\u0647 \u0645\u0627 \u0627\u0645\u06a9\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f \u062f\u0631 \u0647\u0631 \u0645\u0631\u062d\u0644\u0647 \u0632\u0645\u0627\u0646\u06cc \u0628\u0647\u062a\u0631\u06cc\u0646 \u0627\u0642\u062f\u0627\u0645 \u0645\u0645\u06a9\u0646 (\u0622\u0646\u06cc \u06a9\u0647 \u0628\u0627\u0644\u0627\u062a\u0631\u06cc\u0646 \u0628\u0627\u0632\u062f\u0647 \u0645\u0648\u0631\u062f \u0627\u0646\u062a\u0638\u0627\u0631 \u0631\u0627 \u062f\u0627\u0631\u062f) \u0627\u0646\u062a\u062e\u0627\u0628 \u06a9\u0646\u06cc\u0645.<\/p>\n<h3 id=\"actionvaluemethods\"><span class=\"ez-toc-section\" id=\"%d8%b1%d9%88%d8%b4%e2%80%8c%d9%87%d8%a7%db%8c_%d8%b9%d9%85%d9%84_%d8%a7%d8%b1%d8%b2%d8%b4\"><\/span>\u0631\u0648\u0634\u200c\u0647\u0627\u06cc \u0639\u0645\u0644 \u0627\u0631\u0632\u0634<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u06cc\u06a9 \u0631\u0627\u0647 \u062d\u0644 \u0628\u0633\u06cc\u0627\u0631 \u0633\u0627\u062f\u0647 \u0627\u0633\u062a \u0631\u0648\u06cc \u062a\u0627\u0628\u0639 \u0645\u0642\u062f\u0627\u0631 \u0639\u0645\u0644  \u0628\u0647 \u06cc\u0627\u062f \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u06cc\u062f \u06a9\u0647 \u06cc\u06a9 \u0645\u0642\u062f\u0627\u0631 \u0639\u0645\u0644\u060c \u067e\u0627\u062f\u0627\u0634 \u0645\u06cc\u0627\u0646\u06af\u06cc\u0646 \u0632\u0645\u0627\u0646\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0622\u0646 \u0639\u0645\u0644 \u0627\u0646\u062a\u062e\u0627\u0628 \u0645\u06cc \u0634\u0648\u062f:<\/p>\n<p>$$ q(a) = E(R_t \\mid A=a) $$\n<\/p>\n<p>\u0645\u0627 \u0628\u0647 \u0631\u0627\u062d\u062a\u06cc \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u062a\u062e\u0645\u06cc\u0646 \u0628\u0632\u0646\u06cc\u0645 <strong>q<\/strong> \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 <em>\u0645\u06cc\u0627\u0646\u06af\u06cc\u0646 \u0646\u0645\u0648\u0646\u0647<\/em>:<\/p>\n<p>$$ Q_t(a) = \\frac{\\text{\u0645\u062c\u0645\u0648\u0639 \u062c\u0648\u0627\u06cc\u0632 \u0632\u0645\u0627\u0646\u06cc \u06a9\u0647 &#8220;a&#8221; \u0642\u0628\u0644 \u0627\u0632 &#8220;t&#8221; \u06af\u0631\u0641\u062a\u0647 \u0634\u062f\u0647 \u0627\u0633\u062a}}{\\text{\u062a\u0639\u062f\u0627\u062f \u062f\u0641\u0639\u0627\u062a\u06cc \u06a9\u0647 &#8220;a&#8221; \u0642\u0628\u0644 \u0627\u0632 &#8220;t&#8221; \u06af\u0631\u0641\u062a\u0647 \u0634\u062f\u0647 \u0627\u0633\u062a}} $$\n<\/p>\n<p>\u0627\u06af\u0631 \u0645\u0634\u0627\u0647\u062f\u0627\u062a \u06a9\u0627\u0641\u06cc \u062c\u0645\u0639 \u0622\u0648\u0631\u06cc \u06a9\u0646\u06cc\u0645\u060c \u062a\u062e\u0645\u06cc\u0646 \u0645\u0627 \u0628\u0647 \u0627\u0646\u062f\u0627\u0632\u0647 \u06a9\u0627\u0641\u06cc \u0628\u0647 \u062a\u0627\u0628\u0639 \u0648\u0627\u0642\u0639\u06cc \u0646\u0632\u062f\u06cc\u06a9 \u0645\u06cc \u0634\u0648\u062f.  \u0633\u067e\u0633 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u062f\u0631 \u0647\u0631 \u0645\u0631\u062d\u0644\u0647 \u062d\u0631\u06cc\u0635\u0627\u0646\u0647 \u0639\u0645\u0644 \u06a9\u0646\u06cc\u0645\u060c \u06cc\u0639\u0646\u06cc \u0627\u0642\u062f\u0627\u0645\u06cc \u0631\u0627 \u0628\u0627 \u0628\u0627\u0644\u0627\u062a\u0631\u06cc\u0646 \u0627\u0631\u0632\u0634 \u0627\u0646\u062a\u062e\u0627\u0628 \u06a9\u0646\u06cc\u0645 \u062a\u0627 \u0628\u0627\u0644\u0627\u062a\u0631\u06cc\u0646 \u067e\u0627\u062f\u0627\u0634 \u0645\u0645\u06a9\u0646 \u0631\u0627 \u062c\u0645\u0639 \u0622\u0648\u0631\u06cc \u06a9\u0646\u06cc\u0645.<\/p>\n<h3 id=\"dontbetoogreedy\"><span class=\"ez-toc-section\" id=\"%d8%b2%db%8c%d8%a7%d8%af_%d8%ad%d8%b1%db%8c%d8%b5_%d9%86%d8%a8%d8%a7%d8%b4\"><\/span>\u0632\u06cc\u0627\u062f \u062d\u0631\u06cc\u0635 \u0646\u0628\u0627\u0634<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u0632\u0645\u0627\u0646\u06cc \u0631\u0627 \u0628\u0647 \u062e\u0627\u0637\u0631 \u0645\u06cc \u0622\u0648\u0631\u06cc\u062f \u06a9\u0647 \u0627\u0632 \u0645\u0628\u0627\u062f\u0644\u0647 \u0628\u06cc\u0646 \u0627\u06a9\u062a\u0634\u0627\u0641 \u0648 \u0628\u0647\u0631\u0647 \u0628\u0631\u062f\u0627\u0631\u06cc \u0635\u062d\u0628\u062a \u06a9\u0631\u062f\u06cc\u0645\u061f  \u0627\u06cc\u0646 \u06cc\u06a9 \u0646\u0645\u0648\u0646\u0647 \u0627\u0632 \u0627\u06cc\u0646 \u0627\u0633\u062a \u06a9\u0647 \u0686\u0631\u0627 \u0628\u0627\u06cc\u062f \u0628\u0647 \u0622\u0646 \u0627\u0647\u0645\u06cc\u062a \u062f\u0647\u06cc\u0645.<\/p>\n<p>\u062f\u0631 \u0648\u0627\u0642\u0639\u060c \u0627\u06af\u0631 \u0647\u0645\u0627\u0646\u0637\u0648\u0631 \u06a9\u0647 \u062f\u0631 \u067e\u0627\u0631\u0627\u06af\u0631\u0627\u0641 \u0642\u0628\u0644 \u067e\u06cc\u0634\u0646\u0647\u0627\u062f \u0634\u062f\u060c \u0647\u0645\u06cc\u0634\u0647 \u062d\u0631\u06cc\u0635\u0627\u0646\u0647 \u0639\u0645\u0644 \u06a9\u0646\u06cc\u0645\u060c \u0647\u0631\u06af\u0632 \u0627\u0642\u062f\u0627\u0645\u0627\u062a \u063a\u06cc\u0631\u0628\u0647\u06cc\u0646\u0647 \u0631\u0627 \u0627\u0645\u062a\u062d\u0627\u0646 \u0646\u0645\u06cc \u06a9\u0646\u06cc\u0645 \u06a9\u0647 \u062f\u0631 \u0646\u0647\u0627\u06cc\u062a \u0645\u0645\u06a9\u0646 \u0627\u0633\u062a \u0628\u0647 \u0646\u062a\u0627\u06cc\u062c \u0628\u0647\u062a\u0631\u06cc \u0645\u0646\u062c\u0631 \u0634\u0648\u062f.<\/p>\n<p>\u0628\u0631\u0627\u06cc \u0645\u0639\u0631\u0641\u06cc \u062f\u0631\u062c\u0647 \u0627\u06cc \u0627\u0632 \u06a9\u0627\u0648\u0634 \u062f\u0631 \u0631\u0627\u0647 \u062d\u0644 \u062e\u0648\u062f\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0632 \u06cc\u06a9 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u0645 <strong>\u03b5-\u0637\u0645\u0639<\/strong> \u0627\u0633\u062a\u0631\u0627\u062a\u0698\u06cc: \u0645\u0627 \u062f\u0631 \u0628\u06cc\u0634\u062a\u0631 \u0645\u0648\u0627\u0642\u0639 \u0627\u0642\u062f\u0627\u0645\u0627\u062a\u06cc \u0631\u0627 \u0628\u0627 \u062d\u0631\u0635 \u0627\u0646\u062a\u062e\u0627\u0628 \u0645\u06cc \u06a9\u0646\u06cc\u0645\u060c \u0627\u0645\u0627 \u0647\u0631 \u0686\u0646\u062f \u0648\u0642\u062a \u06cc\u06a9\u0628\u0627\u0631\u060c \u0628\u0627 \u0627\u062d\u062a\u0645\u0627\u0644 <strong>\u03b5<\/strong>\u060c \u06cc\u06a9 \u0639\u0645\u0644 \u062a\u0635\u0627\u062f\u0641\u06cc \u0631\u0627 \u0628\u062f\u0648\u0646 \u062a\u0648\u062c\u0647 \u0628\u0647 \u0645\u0642\u0627\u062f\u06cc\u0631 \u0639\u0645\u0644 \u0627\u0646\u062a\u062e\u0627\u0628 \u0645\u06cc \u06a9\u0646\u06cc\u0645.<\/p>\n<p>\u0628\u0647 \u0646\u0638\u0631 \u0645\u06cc \u0631\u0633\u062f \u06a9\u0647 \u0627\u06cc\u0646 \u0631\u0648\u0634 \u0627\u06a9\u062a\u0634\u0627\u0641 \u0633\u0627\u062f\u0647 \u0628\u0633\u06cc\u0627\u0631 \u062e\u0648\u0628 \u0639\u0645\u0644 \u0645\u06cc \u06a9\u0646\u062f \u0648 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u067e\u0627\u062f\u0627\u0634 \u0647\u0627\u06cc \u062f\u0631\u06cc\u0627\u0641\u062a\u06cc \u0645\u0627 \u0631\u0627 \u0628\u0647 \u0645\u06cc\u0632\u0627\u0646 \u0642\u0627\u0628\u0644 \u062a\u0648\u062c\u0647\u06cc \u0627\u0641\u0632\u0627\u06cc\u0634 \u062f\u0647\u062f.<\/p>\n<p>\u06cc\u06a9 \u0646\u06a9\u062a\u0647 \u0622\u062e\u0631 &#8211; \u0628\u0631\u0627\u06cc \u062c\u0644\u0648\u06af\u06cc\u0631\u06cc \u0627\u0632 \u06af\u0631\u0627\u0646 \u0634\u062f\u0646 \u0631\u0627\u0647 \u062d\u0644 \u0645\u0627 \u0627\u0632 \u0646\u0638\u0631 \u0645\u062d\u0627\u0633\u0628\u0627\u062a\u06cc\u060c \u0645\u06cc\u0627\u0646\u06af\u06cc\u0646 \u0631\u0627 \u0628\u0647 \u0635\u0648\u0631\u062a \u062a\u062f\u0631\u06cc\u062c\u06cc \u0637\u0628\u0642 \u0627\u06cc\u0646 \u0641\u0631\u0645\u0648\u0644 \u0645\u062d\u0627\u0633\u0628\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645:<\/p>\n<p>$$ Q_{n+1} = Q_n + \\frac{1}{n}(R_n &#8211; Q_n) $$\n<\/p>\n<h3 id=\"pythonsolutionwalkthrough\"><span class=\"ez-toc-section\" id=\"%d8%b1%d8%a7%d9%87_%d8%ad%d9%84_%d9%be%d8%a7%db%8c%d8%aa%d9%88%d9%86\"><\/span>\u0631\u0627\u0647 \u062d\u0644 \u067e\u0627\u06cc\u062a\u0648\u0646<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">import<\/span> numpy <span class=\"hljs-keyword\">as<\/span> np\n\n\nk = <span class=\"hljs-number\">3<\/span>\n\n\nQ = (<span class=\"hljs-number\">0<\/span> <span class=\"hljs-keyword\">for<\/span> _ <span class=\"hljs-keyword\">in<\/span> <span class=\"hljs-built_in\">range<\/span>(k))\n\n\nN = (<span class=\"hljs-number\">0<\/span> <span class=\"hljs-keyword\">for<\/span> _ <span class=\"hljs-keyword\">in<\/span> <span class=\"hljs-built_in\">range<\/span>(k))\n\n\neps = <span class=\"hljs-number\">0.1<\/span>\n\n\np_bandits = (<span class=\"hljs-number\">0.45<\/span>, <span class=\"hljs-number\">0.40<\/span>, <span class=\"hljs-number\">0.80<\/span>)\n\n<span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">pull<\/span>(<span class=\"hljs-params\">a<\/span>):<\/span>\n    <span class=\"hljs-string\">\"\"\"Pull arm of bandit with index `i` and return 1 if win, \n    else return 0.\"\"\"<\/span>\n    <span class=\"hljs-keyword\">if<\/span> np.random.rand() &lt; p_bandits(a):\n        <span class=\"hljs-keyword\">return<\/span> <span class=\"hljs-number\">1<\/span>\n    <span class=\"hljs-keyword\">else<\/span>:\n        <span class=\"hljs-keyword\">return<\/span> <span class=\"hljs-number\">0<\/span>\n\n<span class=\"hljs-keyword\">while<\/span> <span class=\"hljs-literal\">True<\/span>:\n    <span class=\"hljs-keyword\">if<\/span> np.random.rand() &gt; eps:\n        \n        a = np.argmax(Q)\n    <span class=\"hljs-keyword\">else<\/span>:\n        \n        a = np.random.randint(<span class=\"hljs-number\">0<\/span>, k)\n    \n    \n    reward = pull(a)\n    \n    \n    N(a) += <span class=\"hljs-number\">1<\/span>\n    Q(a) += <span class=\"hljs-number\">1<\/span>\/N(a) * (reward - Q(a))\n<\/code><\/pre>\n<p><em>\u0648 voil\u00e0!<\/em> \u0627\u06af\u0631 \u0627\u06cc\u0646 \u0627\u0633\u06a9\u0631\u06cc\u067e\u062a \u0631\u0627 \u0628\u0631\u0627\u06cc \u0686\u0646\u062f \u062b\u0627\u0646\u06cc\u0647 \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u0645\u060c \u0627\u0632 \u0642\u0628\u0644 \u0645\u06cc \u0628\u06cc\u0646\u06cc\u0645 \u06a9\u0647 \u0645\u0642\u0627\u062f\u06cc\u0631 \u0639\u0645\u0644 \u0645\u0627 \u0628\u0627 \u0627\u062d\u062a\u0645\u0627\u0644 \u0636\u0631\u0628\u0647 \u0632\u062f\u0646 \u0628\u0647 \u062c\u06a9\u067e\u0627\u062a \u0628\u0631\u0627\u06cc \u0631\u0627\u0647\u0632\u0646\u0627\u0646 \u0645\u0627 \u0645\u062a\u0646\u0627\u0633\u0628 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\">0.4406301434281669, \n0.39131455399060977, \n0.8008844354479673\n<\/code><\/pre>\n<p>\u0627\u06cc\u0646 \u0628\u062f\u0627\u0646 \u0645\u0639\u0646\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0633\u06cc\u0627\u0633\u062a \u062d\u0631\u06cc\u0635\u0627\u0646\u0647 \u0645\u0627 \u0628\u0647 \u062f\u0631\u0633\u062a\u06cc \u0628\u0647 \u0646\u0641\u0639 \u0627\u0642\u062f\u0627\u0645\u0627\u062a\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u0645 \u0627\u0632 \u0622\u0646\u0647\u0627 \u0627\u0646\u062a\u0638\u0627\u0631 \u067e\u0627\u062f\u0627\u0634 \u0628\u06cc\u0634\u062a\u0631\u06cc \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u06cc\u0645.<\/p>\n<h2 id=\"conclusion\"><span class=\"ez-toc-section\" id=\"%d9%86%d8%aa%db%8c%d8%ac%d9%87\"><\/span>\u0646\u062a\u06cc\u062c\u0647<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u06cc\u06a9 \u0632\u0645\u06cc\u0646\u0647 \u0631\u0648 \u0628\u0647 \u0631\u0634\u062f \u0627\u0633\u062a \u0648 \u0645\u0648\u0627\u0631\u062f \u0632\u06cc\u0627\u062f\u06cc \u0628\u0631\u0627\u06cc \u067e\u0648\u0634\u0634 \u062f\u0627\u062f\u0646 \u0648\u062c\u0648\u062f \u062f\u0627\u0631\u062f.  \u062f\u0631 \u0648\u0627\u0642\u0639\u060c \u0645\u0627 \u0647\u0646\u0648\u0632 \u0628\u0647 \u0627\u0644\u06af\u0648\u0631\u06cc\u062a\u0645\u200c\u0647\u0627 \u0648 \u0645\u062f\u0644\u200c\u0647\u0627\u06cc \u0647\u0645\u0647 \u0645\u0646\u0638\u0648\u0631\u0647 (\u0645\u062b\u0644\u0627\u064b \u0628\u0631\u0646\u0627\u0645\u0647\u200c\u0646\u0648\u06cc\u0633\u06cc \u067e\u0648\u06cc\u0627\u060c \u0645\u0648\u0646\u062a \u06a9\u0627\u0631\u0644\u0648\u060c \u062a\u0641\u0627\u0648\u062a \u0632\u0645\u0627\u0646\u06cc) \u0646\u06af\u0627\u0647 \u0646\u06a9\u0631\u062f\u0647\u200c\u0627\u06cc\u0645.<\/p>\n<p>\u062f\u0631 \u062d\u0627\u0644 \u062d\u0627\u0636\u0631 \u0645\u0647\u0645\u062a\u0631\u06cc\u0646 \u0686\u06cc\u0632 \u0627\u06cc\u0646 \u0627\u0633\u062a \u06a9\u0647 \u0628\u0627 \u0645\u0641\u0627\u0647\u06cc\u0645\u06cc \u0645\u0627\u0646\u0646\u062f \u062a\u0648\u0627\u0628\u0639 \u0627\u0631\u0632\u0634\u060c \u0633\u06cc\u0627\u0633\u062a \u0647\u0627 \u0648 MDP \u0647\u0627 \u0622\u0634\u0646\u0627 \u0634\u0648\u06cc\u062f.  \u062f\u0631 <em>\u0645\u0646\u0627\u0628\u0639<\/em> \u062f\u0631 \u0628\u062e\u0634 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647\u060c \u0645\u0646\u0627\u0628\u0639 \u0628\u0633\u06cc\u0627\u0631 \u062e\u0648\u0628\u06cc \u0628\u0631\u0627\u06cc \u0628\u0647 \u062f\u0633\u062a \u0622\u0648\u0631\u062f\u0646 \u062f\u0631\u06a9 \u0639\u0645\u06cc\u0642 \u062a\u0631 \u0627\u0632 \u0627\u06cc\u0646 \u0646\u0648\u0639 \u0645\u0637\u0627\u0644\u0628 \u067e\u06cc\u062f\u0627 \u062e\u0648\u0627\u0647\u06cc\u062f \u06a9\u0631\u062f.<\/p>\n<h2 id=\"resources\"><span class=\"ez-toc-section\" id=\"%d9%85%d9%86%d8%a7%d8%a8%d8%b9\"><\/span>\u0645\u0646\u0627\u0628\u0639<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<\/div>\n<p><script>\n                        !function(f,b,e,v,n,t,s)\n                        {if(f.fbq)return;n=f.fbq=function(){n.callMethod?\n                        n.callMethod.apply(n,arguments):n.queue.push(arguments)};\n                        if(!f._fbq)f._fbq=n;n.push=n;n.loaded=!0;n.version='2.0';\n                        n.queue=();t=b.createElement(e);t.async=!0;\n                        t.src=v;s=b.getElementsByTagName(e)(0);\n                        s.parentNode.insertBefore(t,s)}(window, document,'script',\n                        'https:\/\/connect.facebook.net\/en_US\/fbevents.js');\n                        fbq('init', '525232124909042');\n                        fbq('track', 'PageView');\n                    <\/script>    (\u0628\u0631\u0686\u0633\u0628\u200c\u0647\u0627 \u0628\u0647 \u062a\u0631\u062c\u0645\u0647)# python<br \/>\n<br \/><br \/>\n<br \/>\u0645\u0646\u062a\u0634\u0631 \u0634\u062f\u0647 \u062f\u0631 1403-01-23 01:40:10<br \/>\n<\/p>\n\n\n<div class=\"kk-star-ratings kksr-auto kksr-align-center kksr-valign-bottom\"\n    data-payload='{&quot;align&quot;:&quot;center&quot;,&quot;id&quot;:&quot;16275&quot;,&quot;slug&quot;:&quot;default&quot;,&quot;valign&quot;:&quot;bottom&quot;,&quot;ignore&quot;:&quot;&quot;,&quot;reference&quot;:&quot;auto&quot;,&quot;class&quot;:&quot;&quot;,&quot;count&quot;:&quot;0&quot;,&quot;legendonly&quot;:&quot;&quot;,&quot;readonly&quot;:&quot;&quot;,&quot;score&quot;:&quot;0&quot;,&quot;starsonly&quot;:&quot;&quot;,&quot;best&quot;:&quot;5&quot;,&quot;gap&quot;:&quot;5&quot;,&quot;greet&quot;:&quot;\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628&quot;,&quot;legend&quot;:&quot;0\\\/5 (0 \u0631\u0627\u06cc)&quot;,&quot;size&quot;:&quot;30&quot;,&quot;title&quot;:&quot;\u0645\u0642\u062f\u0645\u0647 \u0627\u06cc \u0628\u0631 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u0628\u0627 \u067e\u0627\u06cc\u062a\u0648\u0646&quot;,&quot;width&quot;:&quot;0&quot;,&quot;_legend&quot;:&quot;{score}\\\/{best} ({count} \u0631\u0627\u06cc)&quot;,&quot;font_factor&quot;:&quot;1.25&quot;}'>\n            \n<div class=\"kksr-stars\">\n    \n<div class=\"kksr-stars-inactive\">\n            <div class=\"kksr-star\" data-star=\"1\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"2\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"3\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"4\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"5\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n    \n<div class=\"kksr-stars-active\" style=\"width: 0px;\">\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n<\/div>\n                \n\n<div class=\"kksr-legend\" style=\"font-size: 24px;\">\n            <span class=\"kksr-muted\">\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628<\/span>\n    <\/div>\n    <\/div>\n","protected":false},"excerpt":{"rendered":"<p><span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 7<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span>\u0645\u0639\u0631\u0641\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0642\u0648\u06cc\u062a\u06cc \u0642\u0637\u0639\u0627 \u06cc\u06a9\u06cc \u0627\u0632 \u0641\u0639\u0627\u0644 \u062a\u0631\u06cc\u0646 \u0648 \u0645\u062d\u0631\u06a9 \u062a\u0631\u06cc\u0646 \u0632\u0645\u06cc\u0646\u0647 \u0647\u0627\u06cc \u062a\u062d\u0642\u06cc\u0642 \u062f\u0631 \u0647\u0648\u0634 \u0645\u0635\u0646\u0648\u0639\u06cc \u0627\u0633\u062a. \u0639\u0644\u0627\u0642\u0647 \u0628\u0647 \u0627\u06cc\u0646 \u0632\u0645\u06cc\u0646\u0647 \u062f\u0631 \u0637\u06cc \u0686\u0646\u062f \u0633\u0627\u0644 \u06af\u0630\u0634\u062a\u0647\u060c \u0628\u0647 \u062f\u0646\u0628\u0627\u0644 \u067e\u06cc\u0634\u0631\u0641\u062a\u200c\u0647\u0627\u06cc \u0628\u0632\u0631\u06af (\u0648 \u062a\u0628\u0644\u06cc\u063a\u0627\u062a\u06cc \u0628\u0633\u06cc\u0627\u0631) \u0645\u0627\u0646\u0646\u062f DeepMind&#8217;s AlphaGo \u0634\u06a9\u0633\u062a \u062f\u0627\u062f\u0646 \u06a9\u0644\u0645\u0647 \u0642\u0647\u0631\u0645\u0627\u0646 \u0645\u062f\u0644 \u0647\u0627\u06cc GO \u0648 OpenAI AI \u0634\u06a9\u0633\u062a \u062f\u0627\u062f\u0646 \u0628\u0627\u0632\u06cc\u06a9\u0646\u0627\u0646 \u062d\u0631\u0641\u0647 \u0627\u06cc DOTA. \u0628\u0647 \u0644\u0637\u0641 [&hellip;]<\/p>\n","protected":false},"author":3,"featured_media":16276,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1743,620,1686],"tags":[],"class_list":["post-16275","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-python","category-programming","category-ai"],"acf":[],"_links":{"self":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/16275","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/comments?post=16275"}],"version-history":[{"count":0,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/16275\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media\/16276"}],"wp:attachment":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media?parent=16275"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/categories?post=16275"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/tags?post=16275"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}