{"id":13890,"date":"2024-01-03T11:14:15","date_gmt":"2024-01-03T07:44:15","guid":{"rendered":"https:\/\/rasanegar.com\/blog\/%da%af%d8%b1%d9%85-%da%a9%d8%b1%d8%af%d9%86-%d9%86%d8%b1%d8%ae-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%a8%d8%a7-%d9%88%d8%a7%d9%be%d8%a7%d8%b4%db%8c-%da%a9%d8%b3%db%8c%d9%86%d9%88%d8%b3-%d8%af\/"},"modified":"2024-01-03T11:14:15","modified_gmt":"2024-01-03T07:44:15","slug":"%da%af%d8%b1%d9%85-%da%a9%d8%b1%d8%af%d9%86-%d9%86%d8%b1%d8%ae-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%a8%d8%a7-%d9%88%d8%a7%d9%be%d8%a7%d8%b4%db%8c-%da%a9%d8%b3%db%8c%d9%86%d9%88%d8%b3-%d8%af","status":"publish","type":"post","link":"https:\/\/rasanegaar.com\/blog\/%da%af%d8%b1%d9%85-%da%a9%d8%b1%d8%af%d9%86-%d9%86%d8%b1%d8%ae-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%a8%d8%a7-%d9%88%d8%a7%d9%be%d8%a7%d8%b4%db%8c-%da%a9%d8%b3%db%8c%d9%86%d9%88%d8%b3-%d8%af\/","title":{"rendered":"\u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u0627 \u0648\u0627\u067e\u0627\u0634\u06cc \u06a9\u0633\u06cc\u0646\u0648\u0633 \u062f\u0631 Keras\/TensorFlow"},"content":{"rendered":"<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_85 counter-hierarchy ez-toc-counter ez-toc-custom ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\"><p class=\"ez-toc-title\" style=\"cursor:inherit\">\u0633\u0631\u0641\u0635\u0644\u0647\u0627\u06cc \u0645\u0637\u0644\u0628<\/p>\n<\/div><nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/rasanegaar.com\/blog\/%da%af%d8%b1%d9%85-%da%a9%d8%b1%d8%af%d9%86-%d9%86%d8%b1%d8%ae-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%a8%d8%a7-%d9%88%d8%a7%d9%be%d8%a7%d8%b4%db%8c-%da%a9%d8%b3%db%8c%d9%86%d9%88%d8%b3-%d8%af\/#%da%af%d8%b1%d9%85_%da%a9%d8%b1%d8%af%d9%86_%d9%86%d8%b1%d8%ae_%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c\" >\u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/rasanegaar.com\/blog\/%da%af%d8%b1%d9%85-%da%a9%d8%b1%d8%af%d9%86-%d9%86%d8%b1%d8%ae-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%a8%d8%a7-%d9%88%d8%a7%d9%be%d8%a7%d8%b4%db%8c-%da%a9%d8%b3%db%8c%d9%86%d9%88%d8%b3-%d8%af\/#%d9%86%d8%b1%d8%ae_%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c_%d8%a8%d8%a7_keras_callbacks\" >\u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u0627 Keras Callbacks<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/rasanegaar.com\/blog\/%da%af%d8%b1%d9%85-%da%a9%d8%b1%d8%af%d9%86-%d9%86%d8%b1%d8%ae-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%a8%d8%a7-%d9%88%d8%a7%d9%be%d8%a7%d8%b4%db%8c-%da%a9%d8%b3%db%8c%d9%86%d9%88%d8%b3-%d8%af\/#%d9%86%d8%b1%d8%ae_%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c_%d8%a8%d8%a7_%d8%b2%db%8c%d8%b1_%da%a9%d9%84%d8%a7%d8%b3_learningrateschedule\" >\u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u0627 \u0632\u06cc\u0631 \u06a9\u0644\u0627\u0633 LearningRateSchedule<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/rasanegaar.com\/blog\/%da%af%d8%b1%d9%85-%da%a9%d8%b1%d8%af%d9%86-%d9%86%d8%b1%d8%ae-%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c-%d8%a8%d8%a7-%d9%88%d8%a7%d9%be%d8%a7%d8%b4%db%8c-%da%a9%d8%b3%db%8c%d9%86%d9%88%d8%b3-%d8%af\/#%d9%86%d8%aa%db%8c%d8%ac%d9%87\" >\u0646\u062a\u06cc\u062c\u0647<\/a><\/li><\/ul><\/nav><\/div>\n<span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 7<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span><p> <br \/>\n<\/p>\n<div><noscript><\/noscript><\/p>\n<p>\u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u06cc\u06a9 \u0641\u0631\u0627\u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0645\u0647\u0645 \u062f\u0631 \u0634\u0628\u06a9\u0647 \u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0639\u0645\u06cc\u0642 \u0627\u0633\u062a &#8211; \u0648 \u0645\u0633\u062a\u0642\u06cc\u0645\u0627\u064b \u0622\u0646 \u0631\u0627 \u062f\u06cc\u06a9\u062a\u0647 \u0645\u06cc \u06a9\u0646\u062f <em>\u062f\u0631\u062c\u0647<\/em> \u06a9\u0647 \u0628\u0647\u200c\u0631\u0648\u0632\u0631\u0633\u0627\u0646\u06cc\u200c\u0647\u0627\u06cc\u06cc \u0628\u0631\u0627\u06cc \u0648\u0632\u0646\u200c\u0647\u0627 \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc\u200c\u0634\u0648\u062f\u060c \u06a9\u0647 \u062a\u062e\u0645\u06cc\u0646 \u0632\u062f\u0647 \u0645\u06cc\u200c\u0634\u0648\u062f \u0628\u0631\u062e\u06cc \u0627\u0632 \u0639\u0645\u0644\u06a9\u0631\u062f\u0647\u0627\u06cc \u062a\u0644\u0641\u0627\u062a \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0631\u0627 \u0628\u0647 \u062d\u062f\u0627\u0642\u0644 \u0628\u0631\u0633\u0627\u0646\u062f.  \u062f\u0631 SGD:<\/p>\n<p>$$<br \/>weight_{t+1} = weight_t &#8211; lr * \\frac{derror}{dweight_t}<br \/>$$<\/p>\n<p>\u0628\u0627 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0627\u0632 <code>0<\/code>\u060c \u0648\u0632\u0646 \u0628\u0647 \u0631\u0648\u0632 \u0634\u062f\u0647 \u0628\u0647 \u062e\u0648\u062f \u0628\u0627\u0632\u06af\u0634\u062a\u0647 \u0627\u0633\u062a &#8211; <em>\u0648\u0632\u0646<sub>\u062a\u06cc<\/sub><\/em>.  \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0639\u0645\u0644\u0627\u064b \u06a9\u0644\u06cc\u062f\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u0645 \u0622\u0646 \u0631\u0627 \u0628\u0631\u0627\u06cc \u0641\u0639\u0627\u0644 \u06cc\u0627 \u063a\u06cc\u0631\u0641\u0639\u0627\u0644 \u06a9\u0631\u062f\u0646 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u0686\u0631\u062e\u0627\u0646\u06cc\u0645\u060c \u0648 \u0628\u0627 \u06a9\u0646\u062a\u0631\u0644 \u0645\u0633\u062a\u0642\u06cc\u0645 \u062f\u0631\u062c\u0647 \u0628\u0647\u200c\u0631\u0648\u0632\u0631\u0633\u0627\u0646\u06cc \u0648\u0632\u0646\u060c \u062a\u0623\u062b\u06cc\u0631 \u0639\u0645\u062f\u0647\u200c\u0627\u06cc \u0628\u0631 \u0645\u06cc\u0632\u0627\u0646 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062f\u0631 \u062d\u0627\u0644 \u0648\u0642\u0648\u0639 \u062f\u0627\u0631\u062f.<\/p>\n<p>\u0628\u0647\u06cc\u0646\u0647 \u0633\u0627\u0632\u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641 \u0627\u0632 \u0646\u0631\u062e \u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u062a\u0641\u0627\u0648\u062a \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u06a9\u0646\u0646\u062f &#8211; \u0627\u0645\u0627 \u0645\u0641\u0647\u0648\u0645 \u0627\u0633\u0627\u0633\u06cc \u06cc\u06a9\u0633\u0627\u0646 \u0628\u0627\u0642\u06cc \u0645\u06cc \u0645\u0627\u0646\u062f.  \u0646\u06cc\u0627\u0632\u06cc \u0628\u0647 \u06af\u0641\u062a\u0646 \u0646\u06cc\u0633\u062a \u06a9\u0647 \u0645\u06cc\u0632\u0627\u0646 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0648\u0636\u0648\u0639 \u0628\u0633\u06cc\u0627\u0631\u06cc \u0627\u0632 \u0645\u0637\u0627\u0644\u0639\u0627\u062a\u060c \u0645\u0642\u0627\u0644\u0627\u062a \u0648 \u0645\u0639\u06cc\u0627\u0631\u0647\u0627\u06cc \u067e\u0632\u0634\u06a9\u0627\u0646 \u0628\u0648\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<blockquote>\n<p>\u0628\u0647 \u0637\u0648\u0631 \u06a9\u0644\u06cc\u060c \u062a\u0642\u0631\u06cc\u0628\u0627\u064b \u0647\u0645\u0647 \u0645\u0648\u0627\u0641\u0642\u0646\u062f \u06a9\u0647 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062b\u0627\u0628\u062a \u0622\u0646 \u0631\u0627 \u06a9\u0627\u0647\u0634 \u0646\u0645\u06cc\u200c\u062f\u0647\u062f\u060c \u0648 \u0646\u0648\u0639\u06cc \u06a9\u0627\u0647\u0634 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062f\u0631 \u0627\u06a9\u062b\u0631 \u062a\u06a9\u0646\u06cc\u06a9\u200c\u0647\u0627\u06cc\u06cc \u0627\u062a\u0641\u0627\u0642 \u0645\u06cc\u200c\u0627\u0641\u062a\u062f \u06a9\u0647 \u0645\u06cc\u0632\u0627\u0646 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0631\u0627 \u062f\u0631 \u0637\u0648\u0644 \u062a\u0645\u0631\u06cc\u0646 \u062a\u0646\u0638\u06cc\u0645 \u0645\u06cc\u200c\u06a9\u0646\u0646\u062f &#8211; \u0686\u0647 \u06cc\u06a9\u0646\u0648\u0627\u062e\u062a\u060c \u06a9\u0633\u06cc\u0646\u0648\u0633\u060c \u0645\u062b\u0644\u062b\u06cc \u06cc\u0627 \u0627\u0646\u0648\u0627\u0639 \u062f\u06cc\u06af\u0631. \u06a9\u0627\u0647\u0634.<\/p>\n<\/blockquote>\n<p>\u062a\u06a9\u0646\u06cc\u06a9\u06cc \u06a9\u0647 \u062f\u0631 \u0633\u0627\u0644\u0647\u0627\u06cc \u0627\u062e\u06cc\u0631 \u062c\u0627\u06cc \u067e\u0627\u06cc \u062e\u0648\u062f \u0631\u0627 \u067e\u06cc\u062f\u0627 \u06a9\u0631\u062f\u0647 \u0627\u0633\u062a <em>\u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0645\u06cc\u0632\u0627\u0646 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc<\/em>\u060c \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u0639\u0645\u0644\u0627\u064b \u0628\u0627 \u0647\u0631 \u062a\u06a9\u0646\u06cc\u06a9 \u06a9\u0627\u0647\u0634 \u062f\u06cc\u06af\u0631\u06cc \u062c\u0641\u062a \u0634\u0648\u062f.<\/p>\n<h2 id=\"learningratewarmup\"><span class=\"ez-toc-section\" id=\"%da%af%d8%b1%d9%85_%da%a9%d8%b1%d8%af%d9%86_%d9%86%d8%b1%d8%ae_%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c\"><\/span>\u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0627\u06cc\u062f\u0647 \u067e\u0634\u062a \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0633\u0627\u062f\u0647 \u0627\u0633\u062a.  \u062f\u0631 \u0645\u0631\u0627\u062d\u0644 \u0627\u0648\u0644\u06cc\u0647 \u062a\u0645\u0631\u06cc\u0646 &#8211; \u0648\u0632\u0646\u0647 \u0647\u0627 \u0627\u0632 \u062d\u0627\u0644\u062a \u0627\u06cc\u062f\u0647 \u0622\u0644 \u062e\u0648\u062f \u0641\u0627\u0635\u0644\u0647 \u062f\u0627\u0631\u0646\u062f.  \u0627\u06cc\u0646 \u0628\u0647 \u0645\u0639\u0646\u0627\u06cc \u0628\u0647 \u0631\u0648\u0632 \u0631\u0633\u0627\u0646\u06cc \u0647\u0627\u06cc \u0628\u0632\u0631\u06af \u062f\u0631 \u0633\u0631\u0627\u0633\u0631 \u0635\u0641\u062d\u0647 \u0627\u0633\u062a \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 &#8220;\u0627\u0635\u0644\u0627\u062d\u0627\u062a \u0628\u06cc\u0634 \u0627\u0632 \u062d\u062f&#8221; \u0628\u0631\u0627\u06cc \u0647\u0631 \u0648\u0632\u0646 \u062f\u0631 \u0646\u0638\u0631 \u06af\u0631\u0641\u062a\u0647 \u0634\u0648\u062f &#8211; \u06a9\u0647 \u062f\u0631 \u0622\u0646 \u0628\u0647 \u0631\u0648\u0632 \u0631\u0633\u0627\u0646\u06cc \u0634\u062f\u06cc\u062f \u0648\u0632\u0646\u0647 \u062f\u06cc\u06af\u0631 \u0645\u0645\u06a9\u0646 \u0627\u0633\u062a \u0628\u0647 \u0631\u0648\u0632 \u0631\u0633\u0627\u0646\u06cc \u0648\u0632\u0646 \u062f\u06cc\u06af\u0631\u06cc \u0631\u0627 \u0646\u0641\u06cc \u06a9\u0646\u062f \u0648 \u0645\u0631\u0627\u062d\u0644 \u0627\u0648\u0644\u06cc\u0647 \u062a\u0645\u0631\u06cc\u0646 \u0631\u0627 \u0646\u0627\u067e\u0627\u06cc\u062f\u0627\u0631\u062a\u0631 \u06a9\u0646\u062f.<\/p>\n<p>\u0627\u06cc\u0646 \u062a\u063a\u06cc\u06cc\u0631\u0627\u062a \u0628\u0631\u0637\u0631\u0641 \u0645\u06cc\u200c\u0634\u0648\u0646\u062f\u060c \u0627\u0645\u0627 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646 \u0628\u0627 \u062f\u0627\u0634\u062a\u0646 \u06cc\u06a9 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u06a9\u0648\u0686\u06a9 \u0628\u0631\u0627\u06cc \u0634\u0631\u0648\u0639\u060c \u0631\u0633\u06cc\u062f\u0646 \u0628\u0647 \u0648\u0636\u0639\u06cc\u062a \u067e\u0627\u06cc\u06cc\u0646\u200c\u062a\u0631 \u0627\u0632 \u0628\u0647\u06cc\u0646\u0647\u200c\u062a\u0631 \u0628\u0627 \u062b\u0628\u0627\u062a\u200c\u062a\u0631\u060c \u0648 \u0633\u067e\u0633 \u0627\u0639\u0645\u0627\u0644 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u0632\u0631\u06af\u200c\u062a\u0631 \u0627\u0632 \u0622\u0646\u0647\u0627 \u062c\u0644\u0648\u06af\u06cc\u0631\u06cc \u06a9\u0631\u062f.  \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0647\u200c\u062c\u0627\u06cc \u0636\u0631\u0628\u0647 \u0632\u062f\u0646 \u0628\u0627 \u0622\u0646\u0647\u0627\u060c \u0634\u0628\u06a9\u0647 \u0631\u0627 \u0628\u0647\u200c\u0646\u0648\u0639\u06cc \u0628\u0647\u200c\u0631\u0648\u0632\u0631\u0633\u0627\u0646\u06cc \u06a9\u0646\u06cc\u062f.<\/p>\n<p>\u0627\u06cc\u0646 \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0627\u0633\u062a!  \u0634\u0631\u0648\u0639 \u0628\u0627 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u06a9\u0645 (\u06cc\u0627 0) \u0648 \u0627\u0641\u0632\u0627\u06cc\u0634 \u0628\u0647 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0634\u0631\u0648\u0639 (\u0686\u06cc\u0632\u06cc \u06a9\u0647 \u0628\u0647 \u0647\u0631 \u062d\u0627\u0644 \u0628\u0627 \u0622\u0646 \u0634\u0631\u0648\u0639 \u0645\u06cc \u06a9\u0646\u06cc\u062f).  \u0627\u06cc\u0646 \u0627\u0641\u0632\u0627\u06cc\u0634 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u0647\u0631 \u062a\u0627\u0628\u0639\u06cc \u0631\u0627 \u0648\u0627\u0642\u0639\u0627\u064b \u062f\u0646\u0628\u0627\u0644 \u06a9\u0646\u062f\u060c \u0627\u0645\u0627 \u0645\u0639\u0645\u0648\u0644\u0627\u064b \u062e\u0637\u06cc \u0627\u0633\u062a.<\/p>\n<blockquote>\n<p>\u067e\u0633 \u0627\u0632 \u0631\u0633\u06cc\u062f\u0646 \u0628\u0647 \u0646\u0631\u062e \u0627\u0648\u0644\u06cc\u0647\u060c \u0628\u0631\u0646\u0627\u0645\u0647 \u0647\u0627\u06cc \u062f\u06cc\u06af\u0631\u06cc \u0645\u0627\u0646\u0646\u062f \u0648\u0627\u067e\u0627\u0634\u06cc \u06a9\u0633\u06cc\u0646\u0648\u0633\u060c \u06a9\u0627\u0647\u0634 \u062e\u0637\u06cc \u0648 \u063a\u06cc\u0631\u0647 \u0631\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0628\u0631\u0627\u06cc \u06a9\u0627\u0647\u0634 \u062a\u062f\u0631\u06cc\u062c\u06cc \u0646\u0631\u062e \u062a\u0627 \u067e\u0627\u06cc\u0627\u0646 \u062a\u0645\u0631\u06cc\u0646 \u0627\u0639\u0645\u0627\u0644 \u06a9\u0631\u062f.  \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0645\u06cc\u0632\u0627\u0646 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0639\u0645\u0648\u0644\u0627\u064b \u0628\u062e\u0634\u06cc \u0627\u0632 \u06cc\u06a9 \u0628\u0631\u0646\u0627\u0645\u0647 \u062f\u0648 \u0632\u0645\u0627\u0646\u200c\u0628\u0646\u062f\u06cc \u0627\u0633\u062a\u060c \u062c\u0627\u06cc\u06cc \u06a9\u0647 \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 LR \u0627\u0648\u0644\u06cc\u0646 \u0628\u0627\u0631 \u0627\u0633\u062a\u060c \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u0628\u0631\u0646\u0627\u0645\u0647 \u062f\u06cc\u06af\u0631\u06cc \u067e\u0633 \u0627\u0632 \u0631\u0633\u06cc\u062f\u0646 \u0646\u0631\u062e \u0628\u0647 \u0646\u0642\u0637\u0647 \u0634\u0631\u0648\u0639 \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc\u200c\u0634\u0648\u062f.<\/p>\n<\/blockquote>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0631\u0627\u0647\u0646\u0645\u0627\u060c \u0645\u0627 \u06cc\u06a9 \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0631\u0627 \u062f\u0631 Keras\/TensorFlow \u0628\u0647 \u0635\u0648\u0631\u062a \u06cc\u06a9 \u0627\u062c\u0631\u0627 \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>keras.optimizers.schedules.LearningRateSchedule<\/code> \u0632\u06cc\u0631 \u06a9\u0644\u0627\u0633 \u0648 <code>keras.callbacks.Callback<\/code> \u067e\u0627\u0633\u062e \u0628\u0647 \u062a\u0645\u0627\u0633  \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0627\u0632 \u0627\u0641\u0632\u0627\u06cc\u0634 \u0645\u06cc \u06cc\u0627\u0628\u062f <code>0<\/code> \u0628\u0647 <code>target_lr<\/code> \u0648 \u0648\u0627\u067e\u0627\u0634\u06cc \u06a9\u0633\u06cc\u0646\u0648\u0633 \u0631\u0627 \u0627\u0639\u0645\u0627\u0644 \u06a9\u0646\u06cc\u062f\u060c \u0632\u06cc\u0631\u0627 \u0627\u06cc\u0646 \u06cc\u06a9 \u0628\u0631\u0646\u0627\u0645\u0647 \u062b\u0627\u0646\u0648\u06cc\u0647 \u0628\u0633\u06cc\u0627\u0631 \u0631\u0627\u06cc\u062c \u0627\u0633\u062a.  \u0637\u0628\u0642 \u0645\u0639\u0645\u0648\u0644\u060c Keras \u067e\u06cc\u0627\u062f\u0647\u200c\u0633\u0627\u0632\u06cc \u0631\u0627\u0647\u200c\u062d\u0644\u200c\u0647\u0627\u06cc \u0627\u0646\u0639\u0637\u0627\u0641\u200c\u067e\u0630\u06cc\u0631 \u0631\u0627 \u0628\u0647 \u0631\u0648\u0634\u200c\u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641 \u0648 \u0627\u0631\u0633\u0627\u0644 \u0622\u0646\u200c\u0647\u0627 \u0628\u0627 \u0634\u0628\u06a9\u0647 \u0634\u0645\u0627 \u0633\u0627\u062f\u0647 \u0645\u06cc\u200c\u06a9\u0646\u062f.<\/p>\n<div class=\"alert alert-note\">\n<div class=\"flex\">\n<div class=\"flex-shrink-0 mr-3\"><\/div>\n<div class=\"w-full\">\n<p><strong>\u062a\u0648\u062c\u0647 \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u06cc\u062f:<\/strong> \u067e\u06cc\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u0639\u0645\u0648\u0645\u06cc \u0648 \u0627\u0644\u0647\u0627\u0645 \u06af\u0631\u0641\u062a\u0647 \u0634\u062f\u0647 \u0627\u0633\u062a <a target=\"_blank\" rel=\"nofollow noopener noreferrer\" href=\"https:\/\/github.com\/Tony607\/Keras_Bag_of_Tricks\">\u0627\u062c\u0631\u0627\u06cc \u06a9\u0631\u0627\u0633 \u062a\u0648\u0646\u06cc<\/a> \u0627\u0632 \u062a\u0631\u0641\u0646\u062f\u0647\u0627\u06cc \u0630\u06a9\u0631 \u0634\u062f\u0647 \u062f\u0631 &#8220;<em>\u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u06cc \u0627\u0632 \u062a\u0631\u0641\u0646\u062f\u0647\u0627 \u0628\u0631\u0627\u06cc \u0637\u0628\u0642\u0647 \u0628\u0646\u062f\u06cc \u062a\u0635\u0648\u06cc\u0631 \u0628\u0627 \u0634\u0628\u06a9\u0647 \u0647\u0627\u06cc \u0639\u0635\u0628\u06cc \u06a9\u0627\u0646\u0648\u0644\u0648\u0634\u0646&#8221;<\/em>.<\/p>\n<\/p><\/div><\/div><\/div>\n<h2 id=\"learningratewithkerascallbacks\"><span class=\"ez-toc-section\" id=\"%d9%86%d8%b1%d8%ae_%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c_%d8%a8%d8%a7_keras_callbacks\"><\/span>\u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u0627 Keras Callbacks<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0633\u0627\u062f\u0647\u200c\u062a\u0631\u06cc\u0646 \u0631\u0627\u0647 \u0628\u0631\u0627\u06cc \u067e\u06cc\u0627\u062f\u0647\u200c\u0633\u0627\u0632\u06cc \u0647\u0631 \u0632\u0645\u0627\u0646\u200c\u0628\u0646\u062f\u06cc \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc\u060c \u0627\u06cc\u062c\u0627\u062f \u062a\u0627\u0628\u0639\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0627\u0632 \u0622\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u06a9\u0646\u062f <code>lr<\/code> \u067e\u0627\u0631\u0627\u0645\u062a\u0631 (<code>float32<\/code>)\u060c \u0622\u0646 \u0631\u0627 \u0627\u0632 \u0637\u0631\u06cc\u0642 \u062a\u063a\u06cc\u06cc\u0631 \u0634\u06a9\u0644 \u0645\u06cc \u062f\u0647\u062f \u0648 \u0622\u0646 \u0631\u0627 \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f.  \u0633\u067e\u0633 \u0627\u06cc\u0646 \u062a\u0627\u0628\u0639 \u0627\u0631\u0633\u0627\u0644 \u0645\u06cc \u0634\u0648\u062f \u0631\u0648\u06cc \u0628\u0647 <code>LearningRateScheduler<\/code> \u062a\u0645\u0627\u0633 \u0628\u0631\u06af\u0634\u062a\u06cc\u060c \u06a9\u0647 \u062a\u0627\u0628\u0639 \u0631\u0627 \u0628\u0631\u0627\u06cc \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0627\u0639\u0645\u0627\u0644 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u062f\u0631 \u062d\u0627\u0644 \u062d\u0627\u0636\u0631 <code>tf.keras.callbacks.LearningRateScheduler()<\/code> \u0639\u062f\u062f \u062f\u0648\u0631\u0647 \u0631\u0627 \u0628\u0647 \u062a\u0627\u0628\u0639\u06cc \u06a9\u0647 \u0628\u0631\u0627\u06cc \u0645\u062d\u0627\u0633\u0628\u0647 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u06a9\u0646\u062f\u060c \u0645\u0646\u062a\u0642\u0644 \u0645\u06cc \u06a9\u0646\u062f\u060c \u06a9\u0647 \u0628\u0633\u06cc\u0627\u0631 \u062f\u0631\u0634\u062a \u0627\u0633\u062a.  LR Warmup \u0628\u0627\u06cc\u062f \u0627\u0646\u062c\u0627\u0645 \u0634\u0648\u062f \u0631\u0648\u06cc \u0647\u0631 \u06cc\u06a9 <em>\u06af\u0627\u0645<\/em> (\u062f\u0633\u062a\u0647 \u0627\u06cc)\u060c \u0646\u0647 \u062f\u0648\u0631\u0647\u060c \u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646 \u0645\u0627 \u0628\u0627\u06cc\u062f a \u0631\u0627 \u0627\u0633\u062a\u062e\u0631\u0627\u062c \u06a9\u0646\u06cc\u0645 <code>global_step<\/code> (\u062f\u0631 \u0647\u0645\u0647 \u0627\u062f\u0648\u0627\u0631) \u0628\u0631\u0627\u06cc \u0645\u062d\u0627\u0633\u0628\u0647 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u0647 \u062c\u0627\u06cc \u0622\u0646\u060c \u0648 \u0632\u06cc\u0631 \u06a9\u0644\u0627\u0633 <code>Callback<\/code> \u06a9\u0644\u0627\u0633 \u0628\u0631\u0627\u06cc \u0627\u06cc\u062c\u0627\u062f \u06cc\u06a9 \u0641\u0631\u0627\u062e\u0648\u0627\u0646 \u0633\u0641\u0627\u0631\u0634\u06cc \u0628\u0647 \u062c\u0627\u06cc \u0627\u0631\u0633\u0627\u0644 \u062a\u0627\u0628\u0639\u060c \u0632\u06cc\u0631\u0627 \u0628\u0627\u06cc\u062f \u062f\u0631 \u0622\u0631\u06af\u0648\u0645\u0627\u0646 \u0647\u0627 \u0627\u0631\u0633\u0627\u0644 \u06a9\u0646\u06cc\u0645 \u0631\u0648\u06cc \u0647\u0631 \u0641\u0631\u0627\u062e\u0648\u0627\u0646\u06cc\u060c \u06a9\u0647 \u062f\u0631 \u0647\u0646\u06af\u0627\u0645 \u0627\u0646\u062a\u0642\u0627\u0644 \u062a\u0627\u0628\u0639 \u063a\u06cc\u0631\u0645\u0645\u06a9\u0646 \u0627\u0633\u062a:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">func<\/span>():<\/span>\n    <span class=\"hljs-keyword\">return<\/span> ...\n    \nkeras.callbacks.LearningRateScheduler(func)\n<\/code><\/pre>\n<p>\u0627\u06cc\u0646 \u0631\u0648\u06cc\u06a9\u0631\u062f \u0632\u0645\u0627\u0646\u06cc \u0645\u0637\u0644\u0648\u0628 \u0627\u0633\u062a \u06a9\u0647 \u0633\u0637\u062d \u0628\u0627\u0644\u0627\u06cc\u06cc \u0627\u0632 \u0633\u0641\u0627\u0631\u0634\u06cc\u200c\u0633\u0627\u0632\u06cc \u0631\u0627 \u0646\u0645\u06cc\u200c\u062e\u0648\u0627\u0647\u06cc\u062f \u0648 \u0646\u0645\u06cc\u200c\u062e\u0648\u0627\u0647\u06cc\u062f \u0628\u0627 \u0631\u0648\u0634\u06cc \u06a9\u0647 Keras \u0631\u0641\u062a\u0627\u0631 \u0645\u06cc\u200c\u06a9\u0646\u062f \u062a\u062f\u0627\u062e\u0644 \u062f\u0627\u0634\u062a\u0647 \u0628\u0627\u0634\u06cc\u062f. <code>lr<\/code>\u0648 \u0628\u0647 \u062e\u0635\u0648\u0635 \u0627\u06af\u0631 \u0645\u06cc\u200c\u062e\u0648\u0627\u0647\u06cc\u062f \u0627\u0632 \u062a\u0645\u0627\u0633\u200c\u0647\u0627\u06cc \u0628\u0631\u06af\u0634\u062a\u06cc \u0645\u0627\u0646\u0646\u062f <code>ReduceLROnPlateau()<\/code> \u0627\u0632 \u0622\u0646\u062c\u0627\u06cc\u06cc \u06a9\u0647 \u0641\u0642\u0637 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u0628\u0627 \u06cc\u06a9 float-based \u06a9\u0627\u0631 \u06a9\u0646\u062f <code>lr<\/code>.  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u06cc\u06a9 \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06cc\u06a9 \u0641\u0631\u0627\u062e\u0648\u0627\u0646 Keras\u060c \u0628\u0627 \u06cc\u06a9 \u062a\u0627\u0628\u0639 \u0631\u0627\u062d\u062a \u0634\u0631\u0648\u0639 \u06a9\u0646\u06cc\u0645:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">lr_warmup_cosine_decay<\/span>(<span class=\"hljs-params\">global_step,\n                           warmup_steps,\n                           hold = <span class=\"hljs-number\">0<\/span>,\n                           total_steps=<span class=\"hljs-number\">0<\/span>,\n                           start_lr=<span class=\"hljs-number\">0.0<\/span>,\n                           target_lr=<span class=\"hljs-number\">1e-3<\/span><\/span>):<\/span>\n    \n    learning_rate = <span class=\"hljs-number\">0.5<\/span> * target_lr * (<span class=\"hljs-number\">1<\/span> + np.cos(np.pi * (global_step - warmup_steps - hold) \/ <span class=\"hljs-built_in\">float<\/span>(total_steps - warmup_steps - hold)))\n\n    \n    warmup_lr = target_lr * (global_step \/ warmup_steps)\n\n    \n    \n    <span class=\"hljs-keyword\">if<\/span> hold &gt; <span class=\"hljs-number\">0<\/span>:\n        learning_rate = np.where(global_step &gt; warmup_steps + hold,\n                                 learning_rate, target_lr)\n    \n    learning_rate = np.where(global_step &lt; warmup_steps, warmup_lr, learning_rate)\n    <span class=\"hljs-keyword\">return<\/span> learning_rate\n<\/code><\/pre>\n<p>\u062f\u0631 \u0647\u0631 \u0645\u0631\u062d\u0644\u0647\u060c \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0648 \u0633\u0631\u0639\u062a \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 (\u0647\u0631 \u062f\u0648 \u0639\u0646\u0635\u0631 \u0628\u0631\u0646\u0627\u0645\u0647) \u0631\u0627 \u0628\u0627 \u062a\u0648\u062c\u0647 \u0628\u0647 <code>start_lr<\/code> \u0648 <code>target_lr<\/code>. <code>start_lr<\/code> \u0645\u0639\u0645\u0648\u0644\u0627 \u062f\u0631 \u0634\u0631\u0648\u0639 \u062e\u0648\u0627\u0647\u062f \u0634\u062f <code>0.0<\/code>\u060c \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 <code>target_lr<\/code> \u0628\u0633\u062a\u06af\u06cc \u062f\u0627\u0631\u062f \u0631\u0648\u06cc \u0634\u0628\u06a9\u0647 \u0648 \u0628\u0647\u06cc\u0646\u0647 \u0633\u0627\u0632 \u0634\u0645\u0627 &#8211; <code>1e-3<\/code> \u0645\u0645\u06a9\u0646 \u0627\u0633\u062a \u067e\u06cc\u0634\u200c\u0641\u0631\u0636 \u062e\u0648\u0628\u06cc \u0646\u0628\u0627\u0634\u062f\u060c \u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646 \u0647\u0646\u06af\u0627\u0645 \u0641\u0631\u0627\u062e\u0648\u0627\u0646\u06cc \u0631\u0648\u0634\u060c \u0645\u0637\u0645\u0626\u0646 \u0634\u0648\u06cc\u062f \u06a9\u0647 \u0647\u062f\u0641 \u062e\u0648\u062f \u0631\u0627 \u0628\u0631\u0627\u06cc \u0634\u0631\u0648\u0639 LR \u062a\u0646\u0638\u06cc\u0645 \u06a9\u0646\u06cc\u062f.<\/p>\n<p>\u0627\u06af\u0631 <code>global_step<\/code> \u062f\u0631 \u0622\u0645\u0648\u0632\u0634 \u0628\u0627\u0644\u0627\u062a\u0631 \u0627\u0632 <code>warmup_steps<\/code> \u0645\u0627 \u062a\u0646\u0638\u06cc\u0645 \u06a9\u0631\u062f\u0647 \u0627\u06cc\u0645 &#8211; \u0627\u0632 \u0628\u0631\u0646\u0627\u0645\u0647 \u0648\u0627\u067e\u0627\u0634\u06cc \u06a9\u0633\u06cc\u0646\u0648\u0633 LR \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645.  \u0627\u06af\u0631 \u0646\u0647\u060c \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0639\u0646\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0645\u0627 \u0647\u0646\u0648\u0632 \u062f\u0631 \u062d\u0627\u0644 \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0647\u0633\u062a\u06cc\u0645\u060c \u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646 \u0627\u0632 \u06af\u0631\u0645 \u06a9\u0646\u0646\u062f\u0647 LR \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f.  \u0627\u06af\u0631 <code>hold<\/code> \u0622\u0631\u06af\u0648\u0645\u0627\u0646 \u062a\u0646\u0638\u06cc\u0645 \u0634\u062f\u0647 \u0627\u0633\u062a\u060c \u0645\u0627 \u0622\u0646 \u0631\u0627 \u0646\u06af\u0647 \u0645\u06cc \u062f\u0627\u0631\u06cc\u0645 <code>target_lr<\/code> \u0628\u0631\u0627\u06cc \u0622\u0646 \u062a\u0639\u062f\u0627\u062f \u0642\u062f\u0645 \u0628\u0639\u062f \u0627\u0632 \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0648 \u0642\u0628\u0644 \u0627\u0632 \u0648\u0627\u067e\u0627\u0634\u06cc \u06a9\u0633\u06cc\u0646\u0648\u0633. <code>np.where()<\/code> \u06cc\u06a9 \u0646\u062d\u0648 \u0639\u0627\u0644\u06cc \u0628\u0631\u0627\u06cc \u0627\u06cc\u0646 \u0627\u0631\u0627\u0626\u0647 \u0645\u06cc \u062f\u0647\u062f:<\/p>\n<pre><code class=\"hljs\">np.where(condition, value_if_true, value_if_false)\n<\/code><\/pre>\n<p>\u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0639\u0645\u0644\u06a9\u0631\u062f \u0631\u0627 \u0628\u0627 \u0645\u0648\u0627\u0631\u062f \u0632\u06cc\u0631 \u062a\u062c\u0633\u0645 \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code class=\"hljs\">steps = np.arange(<span class=\"hljs-number\">0<\/span>, <span class=\"hljs-number\">1000<\/span>, <span class=\"hljs-number\">1<\/span>)\nlrs = ()\n\n<span class=\"hljs-keyword\">for<\/span> step <span class=\"hljs-keyword\">in<\/span> steps:\n  lrs.append(lr_warmup_cosine_decay(step, total_steps=<span class=\"hljs-built_in\">len<\/span>(steps), warmup_steps=<span class=\"hljs-number\">100<\/span>, hold=<span class=\"hljs-number\">10<\/span>))\nplt.plot(lrs)\n<\/code><\/pre>\n<p><img decoding=\"async\" src=\"https:\/\/rasanegar.com\/blog\/wp-content\/uploads\/2024\/01\/learning-rate-warmup-with-cosine-decay-in-kerastensorflow-1.png\" alt=\"\" title=\"\"><\/p>\n<p>\u0627\u06a9\u0646\u0648\u0646\u060c \u0645\u0627 \u0645\u06cc \u062e\u0648\u0627\u0647\u06cc\u0645 \u0627\u0632 \u0627\u06cc\u0646 \u062a\u0627\u0628\u0639 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0628\u062e\u0634\u06cc \u0627\u0632 \u06cc\u06a9 \u0641\u0631\u0627\u062e\u0648\u0627\u0646\u06cc \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u0645 \u0648 \u0645\u0631\u062d\u0644\u0647 \u0628\u0647\u06cc\u0646\u0647 \u0633\u0627\u0632 \u0631\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0642\u0633\u0645\u062a \u0639\u0628\u0648\u0631 \u062f\u0647\u06cc\u0645 <code>global_step<\/code> \u0628\u0647 \u062c\u0627\u06cc \u06cc\u06a9 \u0639\u0646\u0635\u0631 \u0627\u0632 \u06cc\u06a9 \u0622\u0631\u0627\u06cc\u0647 \u062f\u0644\u062e\u0648\u0627\u0647 &#8211; \u06cc\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0645\u062d\u0627\u0633\u0628\u0627\u062a \u0631\u0627 \u062f\u0631 \u06a9\u0644\u0627\u0633 \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u062f.  \u0628\u06cc\u0627\u06cc\u06cc\u062f \u0632\u06cc\u0631 \u06a9\u0644\u0627\u0633 <code>Callback<\/code> \u06a9\u0644\u0627\u0633:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-keyword\">from<\/span> keras <span class=\"hljs-keyword\">import<\/span> backend <span class=\"hljs-keyword\">as<\/span> K\n\n<span class=\"hljs-class\"><span class=\"hljs-keyword\">class<\/span> <span class=\"hljs-title\">WarmupCosineDecay<\/span>(<span class=\"hljs-params\">keras.callbacks.Callback<\/span>):<\/span>\n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">__init__<\/span>(<span class=\"hljs-params\">self, total_steps=<span class=\"hljs-number\">0<\/span>, warmup_steps=<span class=\"hljs-number\">0<\/span>, start_lr=<span class=\"hljs-number\">0.0<\/span>, target_lr=<span class=\"hljs-number\">1e-3<\/span>, hold=<span class=\"hljs-number\">0<\/span><\/span>):<\/span>\n\n        <span class=\"hljs-built_in\">super<\/span>(WarmupCosineDecay, self).__init__()\n        self.start_lr = start_lr\n        self.hold = hold\n        self.total_steps = total_steps\n        self.global_step = <span class=\"hljs-number\">0<\/span>\n        self.target_lr = target_lr\n        self.warmup_steps = warmup_steps\n        self.lrs = ()\n\n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">on_batch_end<\/span>(<span class=\"hljs-params\">self, batch, logs=<span class=\"hljs-literal\">None<\/span><\/span>):<\/span>\n        self.global_step = self.global_step + <span class=\"hljs-number\">1<\/span>\n        lr = model.optimizer.lr.numpy()\n        self.lrs.append(lr)\n\n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">on_batch_begin<\/span>(<span class=\"hljs-params\">self, batch, logs=<span class=\"hljs-literal\">None<\/span><\/span>):<\/span>\n        lr = lr_warmup_cosine_decay(global_step=self.global_step,\n                                    total_steps=self.total_steps,\n                                    warmup_steps=self.warmup_steps,\n                                    start_lr=self.start_lr,\n                                    target_lr=self.target_lr,\n                                    hold=self.hold)\n        K.set_value(self.model.optimizer.lr, lr)\n<\/code><\/pre>\n<p>\u0627\u0628\u062a\u062f\u0627 \u0633\u0627\u0632\u0646\u062f\u0647 \u06a9\u0644\u0627\u0633 \u0631\u0627 \u062a\u0639\u0631\u06cc\u0641 \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u0648 \u0641\u06cc\u0644\u062f\u0647\u0627\u06cc \u0622\u0646 \u0631\u0627 \u067e\u06cc\u06af\u06cc\u0631\u06cc \u0645\u06cc \u06a9\u0646\u06cc\u0645.  \u062f\u0631 \u0647\u0631 \u062f\u0633\u062a\u0647 \u0627\u06cc \u06a9\u0647 \u0628\u0647 \u067e\u0627\u06cc\u0627\u0646 \u0645\u06cc \u0631\u0633\u062f\u060c \u06af\u0627\u0645 \u062c\u0647\u0627\u0646\u06cc \u0631\u0627 \u0627\u0641\u0632\u0627\u06cc\u0634 \u0645\u06cc \u062f\u0647\u06cc\u0645\u060c LR \u0641\u0639\u0644\u06cc \u0631\u0627 \u06cc\u0627\u062f\u062f\u0627\u0634\u062a \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u0648 \u0622\u0646 \u0631\u0627 \u0628\u0647 \u0644\u06cc\u0633\u062a LR \u0647\u0627 \u062a\u0627 \u06a9\u0646\u0648\u0646 \u0627\u0636\u0627\u0641\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645.  \u062f\u0631 \u0627\u0628\u062a\u062f\u0627\u06cc \u0647\u0631 \u062f\u0633\u062a\u0647 &#8211; \u0645\u0627 LR \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0622\u0646 \u0645\u062d\u0627\u0633\u0628\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>lr_warmup_cosine_decay()<\/code> \u0639\u0645\u0644\u06a9\u0631\u062f \u0648 LR \u0631\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 LR \u0641\u0639\u0644\u06cc \u0628\u0647\u06cc\u0646\u0647 \u0633\u0627\u0632 \u062a\u0646\u0638\u06cc\u0645 \u06a9\u0646\u06cc\u062f.  \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0628\u0627 backend \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc \u0634\u0648\u062f <code>set_value()<\/code>.<\/p>\n<p>\u0628\u0627 \u0627\u0646\u062c\u0627\u0645 \u0627\u06cc\u0646 \u06a9\u0627\u0631 &#8211; \u06a9\u0627\u0641\u06cc\u0633\u062a \u06a9\u0644 \u0645\u0631\u0627\u062d\u0644 (\u0637\u0648\u0644\/\u0627\u0646\u062f\u0627\u0632\u0647_\u062f\u0633\u062a\u0647*\u062f\u0648\u0631\u0627\u0646) \u0631\u0627 \u0645\u062d\u0627\u0633\u0628\u0647 \u06a9\u0646\u06cc\u062f \u0648 \u0628\u062e\u0634\u06cc \u0627\u0632 \u0622\u0646 \u0639\u062f\u062f \u0631\u0627 \u0628\u0631\u0627\u06cc \u062e\u0648\u062f \u0627\u0646\u062a\u062e\u0627\u0628 \u06a9\u0646\u06cc\u062f. <code>warmup_steps<\/code>:<\/p>\n<pre><code class=\"hljs\">\ntotal_steps = <span class=\"hljs-built_in\">len<\/span>(train_set)*config(<span class=\"hljs-string\">'EPOCHS'<\/span>)\n\n\n\nwarmup_steps = <span class=\"hljs-built_in\">int<\/span>(<span class=\"hljs-number\">0.05<\/span>*total_steps)\n\ncallback = WarmupCosineDecay(total_steps=total_steps, \n                             warmup_steps=warmup_steps,\n                             hold=<span class=\"hljs-built_in\">int<\/span>(warmup_steps\/<span class=\"hljs-number\">2<\/span>), \n                             start_lr=<span class=\"hljs-number\">0.0<\/span>, \n                             target_lr=<span class=\"hljs-number\">1e-3<\/span>)\n<\/code><\/pre>\n<p>\u062f\u0631 \u0646\u0647\u0627\u06cc\u062a\u060c \u0645\u062f\u0644 \u062e\u0648\u062f \u0631\u0627 \u0628\u0633\u0627\u0632\u06cc\u062f \u0648 \u067e\u0627\u0633\u062e \u062a\u0645\u0627\u0633 \u0631\u0627 \u062f\u0631 \u0622\u0646 \u0627\u0631\u0627\u0626\u0647 \u06a9\u0646\u06cc\u062f <code>fit()<\/code> \u0632\u0646\u06af \u0632\u062f\u0646:<\/p>\n<pre><code class=\"hljs\">model = keras.applications.EfficientNetV2B0(weights=<span class=\"hljs-literal\">None<\/span>, \n                                            classes=n_classes, \n                                            input_shape=(<span class=\"hljs-number\">224<\/span>, <span class=\"hljs-number\">224<\/span>, <span class=\"hljs-number\">3<\/span>))\n  \nmodel.<span class=\"hljs-built_in\">compile<\/span>(loss=<span class=\"hljs-string\">\"sparse_categorical_crossentropy\"<\/span>,\n                  optimizer=<span class=\"hljs-string\">'adam'<\/span>,\n                  jit_compile=<span class=\"hljs-literal\">True<\/span>,\n                  metrics=(<span class=\"hljs-string\">'accuracy'<\/span>))\n<\/code><\/pre>\n<p>\u062f\u0631 \u067e\u0627\u06cc\u0627\u0646 \u0622\u0645\u0648\u0632\u0634\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f LR \u0647\u0627\u06cc \u062a\u063a\u06cc\u06cc\u0631 \u06cc\u0627\u0641\u062a\u0647 \u0631\u0627 \u0627\u0632 \u0637\u0631\u06cc\u0642:<\/p>\n<pre><code class=\"hljs\">lrs = callback.lrs \nplt.plot(lrs)\n<\/code><\/pre>\n<p><img decoding=\"async\" src=\"https:\/\/rasanegar.com\/blog\/wp-content\/uploads\/2024\/01\/learning-rate-warmup-with-cosine-decay-in-kerastensorflow-2.png\" alt=\"\" title=\"\"><\/p>\n<p>\u0627\u06af\u0631 \u062a\u0627\u0631\u06cc\u062e\u0686\u0647 \u06cc\u06a9 \u0645\u062f\u0644 \u0622\u0645\u0648\u0632\u0634 \u062f\u06cc\u062f\u0647 \u0628\u0627 \u0648 \u0628\u062f\u0648\u0646 \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 LR \u0631\u0627 \u062a\u0631\u0633\u06cc\u0645 \u06a9\u0646\u06cc\u062f &#8211; \u062a\u0641\u0627\u0648\u062a \u0645\u0634\u062e\u0635\u06cc \u062f\u0631 \u067e\u0627\u06cc\u062f\u0627\u0631\u06cc \u062a\u0645\u0631\u06cc\u0646 \u0645\u0634\u0627\u0647\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u062f \u06a9\u0631\u062f:<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/rasanegar.com\/blog\/wp-content\/uploads\/2024\/01\/learning-rate-warmup-with-cosine-decay-in-kerastensorflow-3.png\" alt=\"\" title=\"\"><\/p>\n<h2 id=\"learningratewithlearningrateschedulesubclass\"><span class=\"ez-toc-section\" id=\"%d9%86%d8%b1%d8%ae_%db%8c%d8%a7%d8%af%da%af%db%8c%d8%b1%db%8c_%d8%a8%d8%a7_%d8%b2%db%8c%d8%b1_%da%a9%d9%84%d8%a7%d8%b3_learningrateschedule\"><\/span>\u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u0627 \u0632\u06cc\u0631 \u06a9\u0644\u0627\u0633 LearningRateSchedule<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u06cc\u06a9 \u062c\u0627\u06cc\u06af\u0632\u06cc\u0646 \u0628\u0631\u0627\u06cc \u0627\u06cc\u062c\u0627\u062f \u06cc\u06a9 \u062a\u0645\u0627\u0633\u060c \u0627\u06cc\u062c\u0627\u062f \u06cc\u06a9 \u0627\u0633\u062a <code>LearningRateSchedule<\/code> \u0632\u06cc\u0631 \u06a9\u0644\u0627\u0633\u060c \u06a9\u0647 LR \u0631\u0627 \u062f\u0633\u062a\u06a9\u0627\u0631\u06cc \u0646\u0645\u06cc \u06a9\u0646\u062f &#8211; \u0622\u0646 \u0631\u0627 \u062c\u0627\u06cc\u06af\u0632\u06cc\u0646 \u0645\u06cc \u06a9\u0646\u062f.  \u0627\u06cc\u0646 \u0631\u0648\u06cc\u06a9\u0631\u062f \u0628\u0647 \u0634\u0645\u0627 \u0627\u0645\u06a9\u0627\u0646 \u0645\u06cc\u200c\u062f\u0647\u062f \u06a9\u0645\u06cc \u0628\u06cc\u0634\u062a\u0631 \u0628\u0647 \u067e\u0634\u062a\u06cc\u0628\u0627\u0646 Keras\/TensorFlow \u0648\u0627\u0631\u062f \u0634\u0648\u06cc\u062f\u060c \u0627\u0645\u0627 \u0648\u0642\u062a\u06cc \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u0634\u0648\u062f\u060c \u0646\u0645\u06cc\u200c\u062a\u0648\u0627\u0646 \u0622\u0646 \u0631\u0627 \u0628\u0627 \u0633\u0627\u06cc\u0631 \u062a\u0645\u0627\u0633\u200c\u0647\u0627\u06cc \u0645\u0631\u062a\u0628\u0637 \u0628\u0627 LR \u062a\u0631\u06a9\u06cc\u0628 \u06a9\u0631\u062f\u060c \u0645\u0627\u0646\u0646\u062f <code>ReduceLROnPlateau()<\/code>\u060c \u06a9\u0647 \u0628\u0627 LR \u0647\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0627\u0639\u062f\u0627\u062f \u0645\u0645\u06cc\u0632 \u0634\u0646\u0627\u0648\u0631 \u0633\u0631\u0648\u06a9\u0627\u0631 \u062f\u0627\u0631\u062f.<\/p>\n<p>\u0639\u0644\u0627\u0648\u0647 \u0628\u0631 \u0627\u06cc\u0646\u060c \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06a9\u0644\u0627\u0633 \u0641\u0631\u0639\u06cc \u0627\u0632 \u0634\u0645\u0627 \u0645\u06cc\u200c\u062e\u0648\u0627\u0647\u062f \u06a9\u0647 \u0622\u0646 \u0631\u0627 \u0633\u0631\u06cc\u0627\u0644\u200c\u0633\u0627\u0632\u06cc \u06a9\u0646\u06cc\u062f (overload <code>get_config()<\/code>) \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0628\u062e\u0634\u06cc \u0627\u0632 \u0645\u062f\u0644\u060c \u0627\u06af\u0631 \u0645\u06cc \u062e\u0648\u0627\u0647\u06cc\u062f \u0648\u0632\u0646 \u0645\u062f\u0644 \u0631\u0627 \u0630\u062e\u06cc\u0631\u0647 \u06a9\u0646\u06cc\u062f.  \u0646\u06a9\u062a\u0647 \u062f\u06cc\u06af\u0631\u06cc \u06a9\u0647 \u0628\u0627\u06cc\u062f \u0628\u0647 \u0622\u0646 \u062a\u0648\u062c\u0647 \u06a9\u0631\u062f \u0627\u06cc\u0646 \u0627\u0633\u062a \u06a9\u0647 \u06a9\u0644\u0627\u0633 \u0627\u0646\u062a\u0638\u0627\u0631 \u062f\u0627\u0631\u062f \u0628\u0647 \u0637\u0648\u0631 \u0627\u0646\u062d\u0635\u0627\u0631\u06cc \u0628\u0627 \u0622\u0646 \u06a9\u0627\u0631 \u06a9\u0646\u062f <code>tf.Tensor<\/code>\u0633  \u062e\u0648\u0634\u0628\u062e\u062a\u0627\u0646\u0647\u060c \u062a\u0646\u0647\u0627 \u062a\u0641\u0627\u0648\u062a \u062f\u0631 \u0631\u0648\u0634 \u06a9\u0627\u0631 \u0645\u0627 \u062a\u0645\u0627\u0633 \u062e\u0648\u0627\u0647\u062f \u0628\u0648\u062f <code>tf.func()<\/code> \u0628\u062c\u0627\u06cc <code>np.func()<\/code> \u0627\u0632 \u0622\u0646\u062c\u0627\u06cc\u06cc \u06a9\u0647 API \u0647\u0627\u06cc TensorFlow \u0648 NumPy \u0628\u0647 \u0637\u0631\u0632 \u0634\u06af\u0641\u062a \u0627\u0646\u06af\u06cc\u0632\u06cc \u0645\u0634\u0627\u0628\u0647 \u0648 \u0633\u0627\u0632\u06af\u0627\u0631 \u0647\u0633\u062a\u0646\u062f.<\/p>\n<p>\u0628\u06cc\u0627\u06cc\u06cc\u062f \u0631\u0627\u062d\u062a\u06cc \u0631\u0627 \u0628\u0627\u0632\u0646\u0648\u06cc\u0633\u06cc \u06a9\u0646\u06cc\u0645 <code>lr_warmup_cosine_decay()<\/code> \u062a\u0627\u0628\u0639\u06cc \u0628\u0631\u0627\u06cc \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0639\u0645\u0644\u06cc\u0627\u062a TensorFlow \u0628\u0647 \u062c\u0627\u06cc \u0622\u0646:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">lr_warmup_cosine_decay<\/span>(<span class=\"hljs-params\">global_step,\n                           warmup_steps,\n                           hold = <span class=\"hljs-number\">0<\/span>,\n                           total_steps=<span class=\"hljs-number\">0<\/span>,\n                           start_lr=<span class=\"hljs-number\">0.0<\/span>,\n                           target_lr=<span class=\"hljs-number\">1e-3<\/span><\/span>):<\/span>\n    \n    \n    learning_rate = <span class=\"hljs-number\">0.5<\/span> * target_lr * (<span class=\"hljs-number\">1<\/span> + tf.cos(tf.constant(np.pi) * (global_step - warmup_steps - hold) \/ <span class=\"hljs-built_in\">float<\/span>(total_steps - warmup_steps - hold)))\n\n    \n    warmup_lr = target_lr * (global_step \/ warmup_steps)\n\n    \n    \n    <span class=\"hljs-keyword\">if<\/span> hold &gt; <span class=\"hljs-number\">0<\/span>:\n        learning_rate = tf.where(global_step &gt; warmup_steps + hold,\n                                 learning_rate, target_lr)\n    \n    learning_rate = tf.where(global_step &lt; warmup_steps, warmup_lr, learning_rate)\n    <span class=\"hljs-keyword\">return<\/span> learning_rate\n<\/code><\/pre>\n<p>\u0628\u0627 \u062a\u0627\u0628\u0639 convenience\u060c \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u0645 \u0632\u06cc\u0631 \u06a9\u0644\u0627\u0633\u200c\u0628\u0646\u062f\u06cc \u06a9\u0646\u06cc\u0645 <code>LearningRateSchedule<\/code> \u06a9\u0644\u0627\u0633  \u0631\u0648\u06cc \u0647\u0631 \u06a9\u062f\u0627\u0645 <code>__call__()<\/code> (\u062f\u0633\u062a\u0647 \u0627\u06cc)\u060c LR \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u062a\u0627\u0628\u0639 \u0645\u062d\u0627\u0633\u0628\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u0648 \u0622\u0646 \u0631\u0627 \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u06cc\u0645.  \u0634\u0645\u0627 \u0628\u0647 \u0637\u0648\u0631 \u0637\u0628\u06cc\u0639\u06cc \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0645\u062d\u0627\u0633\u0628\u0647 \u0631\u0627 \u062f\u0631 \u06a9\u0644\u0627\u0633 \u0632\u06cc\u0631 \u06a9\u0644\u0627\u0633 \u0646\u06cc\u0632 \u0628\u0633\u062a\u0647 \u0628\u0646\u062f\u06cc \u06a9\u0646\u06cc\u062f.<\/p>\n<p>\u0646\u062d\u0648 \u062a\u0645\u06cc\u0632\u062a\u0631 \u0627\u0632 <code>Callback<\/code> \u0632\u06cc\u0631 \u06a9\u0644\u0627\u0633\u060c \u062f\u0631 \u062f\u0631\u062c\u0647 \u0627\u0648\u0644 \u0628\u0647 \u0627\u06cc\u0646 \u062f\u0644\u06cc\u0644 \u06a9\u0647 \u0645\u0627 \u0628\u0647 \u0622\u0646 \u062f\u0633\u062a\u0631\u0633\u06cc \u062f\u0627\u0631\u06cc\u0645 <code>step<\/code> \u0645\u06cc\u062f\u0627\u0646\u060c \u0628\u0647 \u062c\u0627\u06cc \u067e\u06cc\u06af\u06cc\u0631\u06cc \u0622\u0646 \u0631\u0648\u06cc \u062e\u0648\u062f\u0645\u0627\u0646 \u0627\u0633\u062a\u060c \u0627\u0645\u0627 \u06a9\u0627\u0631 \u0628\u0627 \u0648\u06cc\u0698\u06af\u06cc \u0647\u0627\u06cc \u06a9\u0644\u0627\u0633 \u0631\u0627 \u0646\u06cc\u0632 \u062a\u0627 \u062d\u062f\u0648\u062f\u06cc \u0633\u062e\u062a \u0645\u06cc \u06a9\u0646\u062f &#8211; \u0628\u0647 \u0648\u06cc\u0698\u0647\u060c \u0627\u0633\u062a\u062e\u0631\u0627\u062c <code>lr<\/code> \u0627\u0632 <code>tf.Tensor()<\/code> \u0628\u0647 \u0647\u0631 \u0646\u0648\u0639 \u062f\u06cc\u06af\u0631\u06cc \u0628\u0631\u0627\u06cc \u067e\u06cc\u06af\u06cc\u0631\u06cc \u062f\u0631 \u06cc\u06a9 \u0644\u06cc\u0633\u062a.  \u0627\u06cc\u0646 \u0631\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0628\u0627 \u0627\u062c\u0631\u0627\u06cc \u062f\u0631 \u062d\u0627\u0644\u062a \u0645\u0634\u062a\u0627\u0642 \u0627\u0632 \u0646\u0638\u0631 \u0641\u0646\u06cc \u062f\u0648\u0631 \u0632\u062f\u060c \u0627\u0645\u0627 \u0628\u0631\u0627\u06cc \u067e\u06cc\u06af\u06cc\u0631\u06cc LR \u0628\u0631\u0627\u06cc \u0627\u0647\u062f\u0627\u0641 \u0627\u0634\u06a9\u0627\u0644 \u0632\u062f\u0627\u06cc\u06cc \u0628\u0627\u0639\u062b \u0646\u0627\u0631\u0627\u062d\u062a\u06cc \u0645\u06cc \u0634\u0648\u062f \u0648 \u0628\u0647\u062a\u0631 \u0627\u0633\u062a \u0627\u0632 \u0622\u0646 \u0627\u062c\u062a\u0646\u0627\u0628 \u0634\u0648\u062f:<\/p>\n<pre><code class=\"hljs\"><span class=\"hljs-class\"><span class=\"hljs-keyword\">class<\/span> <span class=\"hljs-title\">WarmUpCosineDecay<\/span>(<span class=\"hljs-params\">keras.optimizers.schedules.LearningRateSchedule<\/span>):<\/span>\n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">__init__<\/span>(<span class=\"hljs-params\">self, start_lr, target_lr, warmup_steps, total_steps, hold<\/span>):<\/span>\n        <span class=\"hljs-built_in\">super<\/span>().__init__()\n        self.start_lr = start_lr\n        self.target_lr = target_lr\n        self.warmup_steps = warmup_steps\n        self.total_steps = total_steps\n        self.hold = hold\n\n    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">__call__<\/span>(<span class=\"hljs-params\">self, step<\/span>):<\/span>\n        lr = lr_warmup_cosine_decay(global_step=step,\n                                    total_steps=self.total_steps,\n                                    warmup_steps=self.warmup_steps,\n                                    start_lr=self.start_lr,\n                                    target_lr=self.target_lr,\n                                    hold=self.hold)\n\n        <span class=\"hljs-keyword\">return<\/span> tf.where(\n            step &gt; self.total_steps, <span class=\"hljs-number\">0.0<\/span>, lr, name=<span class=\"hljs-string\">\"learning_rate\"<\/span>\n        )\n<\/code><\/pre>\n<p>\u067e\u0627\u0631\u0627\u0645\u062a\u0631\u0647\u0627 \u06cc\u06a9\u0633\u0627\u0646 \u0647\u0633\u062a\u0646\u062f \u0648 \u0645\u06cc \u062a\u0648\u0627\u0646 \u0622\u0646\u0647\u0627 \u0631\u0627 \u0628\u0647 \u0647\u0645\u0627\u0646 \u0631\u0648\u0634 \u0642\u0628\u0644\u06cc \u0645\u062d\u0627\u0633\u0628\u0647 \u06a9\u0631\u062f:<\/p>\n<pre><code class=\"hljs\">\ntotal_steps = <span class=\"hljs-built_in\">len<\/span>(train_set)*config(<span class=\"hljs-string\">'EPOCHS'<\/span>)\n\n\n\nwarmup_steps = <span class=\"hljs-built_in\">int<\/span>(<span class=\"hljs-number\">0.05<\/span>*total_steps)\n\nschedule = WarmUpCosineDecay(start_lr=<span class=\"hljs-number\">0.0<\/span>, target_lr=<span class=\"hljs-number\">1e-3<\/span>, warmup_steps=warmup_steps, total_steps=total_steps, hold=warmup_steps)\n<\/code><\/pre>\n<p>\u0648 \u062e\u0637 \u0644\u0648\u0644\u0647 \u0622\u0645\u0648\u0632\u0634\u06cc \u0641\u0642\u0637 \u0627\u0632 \u0627\u06cc\u0646 \u062c\u0647\u062a \u0645\u062a\u0641\u0627\u0648\u062a \u0627\u0633\u062a \u06a9\u0647 LR \u0628\u0647\u06cc\u0646\u0647 \u0633\u0627\u0632 \u0631\u0627 \u0631\u0648\u06cc \u0645\u0642\u062f\u0627\u0631 \u062a\u0646\u0638\u06cc\u0645 \u0645\u06cc \u06a9\u0646\u06cc\u0645 <code>schedule<\/code>:<\/p>\n<pre><code class=\"hljs\">model = keras.applications.EfficientNetV2B0(weights=<span class=\"hljs-literal\">None<\/span>, \n                                            classes=n_classes, \n                                            input_shape=(<span class=\"hljs-number\">224<\/span>, <span class=\"hljs-number\">224<\/span>, <span class=\"hljs-number\">3<\/span>))\n  \nmodel.<span class=\"hljs-built_in\">compile<\/span>(loss=<span class=\"hljs-string\">\"sparse_categorical_crossentropy\"<\/span>,\n                  optimizer=tf.keras.optimizers.Adam(learning_rate=schedule),\n                  jit_compile=<span class=\"hljs-literal\">True<\/span>,\n                  metrics=(<span class=\"hljs-string\">'accuracy'<\/span>))\n\nhistory3 = model.fit(train_set,\n                    epochs = config(<span class=\"hljs-string\">'EPOCHS'<\/span>),\n                    validation_data=valid_set)\n<\/code><\/pre>\n<p>\u0627\u06af\u0631 \u0645\u06cc \u062e\u0648\u0627\u0647\u06cc\u062f \u0645\u062f\u0644 \u0631\u0627 \u0630\u062e\u06cc\u0631\u0647 \u06a9\u0646\u06cc\u062f\u060c <code>WarmupCosineDecay<\/code> \u0628\u0631\u0646\u0627\u0645\u0647 \u0628\u0627\u06cc\u062f \u0646\u0627\u062f\u06cc\u062f\u0647 \u06af\u0631\u0641\u062a\u0647 \u0634\u0648\u062f <code>get_config()<\/code> \u0631\u0648\u0634:<\/p>\n<pre><code class=\"hljs\">    <span class=\"hljs-function\"><span class=\"hljs-keyword\">def<\/span> <span class=\"hljs-title\">get_config<\/span>(<span class=\"hljs-params\">self<\/span>):<\/span>\n        config = {\n          <span class=\"hljs-string\">'start_lr'<\/span>: self.start_lr,\n          <span class=\"hljs-string\">'target_lr'<\/span>: self.target_lr,\n          <span class=\"hljs-string\">'warmup_steps'<\/span>: self.warmup_steps,\n          <span class=\"hljs-string\">'total_steps'<\/span>: self.total_steps,\n          <span class=\"hljs-string\">'hold'<\/span>: self.hold\n        }\n        <span class=\"hljs-keyword\">return<\/span> config\n<\/code><\/pre>\n<p>\u062f\u0631 \u0646\u0647\u0627\u06cc\u062a\u060c \u0647\u0646\u06af\u0627\u0645 \u0628\u0627\u0631\u06af\u0630\u0627\u0631\u06cc \u0645\u062f\u0644\u060c \u0628\u0627\u06cc\u062f a \u0631\u0627 \u067e\u0627\u0633 \u06a9\u0646\u06cc\u062f <code>WarmupCosineDecay<\/code> \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u06cc\u06a9 \u0634\u06cc \u0633\u0641\u0627\u0631\u0634\u06cc:<\/p>\n<pre><code class=\"hljs\">model = keras.models.load_model(<span class=\"hljs-string\">'weights.h5'<\/span>, \n                                custom_objects={<span class=\"hljs-string\">'WarmupCosineDecay'<\/span>, WarmupCosineDecay})\n<\/code><\/pre>\n<h2 id=\"conclusion\"><span class=\"ez-toc-section\" id=\"%d9%86%d8%aa%db%8c%d8%ac%d9%87\"><\/span>\u0646\u062a\u06cc\u062c\u0647<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0631\u0627\u0647\u0646\u0645\u0627\u060c \u0646\u06af\u0627\u0647\u06cc \u0628\u0647 \u0634\u0647\u0648\u062f \u067e\u0634\u062a \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0627\u0646\u062f\u0627\u062e\u062a\u0647\u200c\u0627\u06cc\u0645 &#8211; \u06cc\u06a9 \u062a\u06a9\u0646\u06cc\u06a9 \u0631\u0627\u06cc\u062c \u0628\u0631\u0627\u06cc \u062f\u0633\u062a\u06a9\u0627\u0631\u06cc \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062f\u0631 \u062d\u06cc\u0646 \u0622\u0645\u0648\u0632\u0634 \u0634\u0628\u06a9\u0647\u200c\u0647\u0627\u06cc \u0639\u0635\u0628\u06cc.<\/p>\n<p>\u0645\u0627 \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0631\u0627 \u0628\u0627 \u0648\u0627\u067e\u0627\u0634\u06cc \u06a9\u0633\u06cc\u0646\u0648\u0633\u060c \u0631\u0627\u06cc\u062c\u200c\u062a\u0631\u06cc\u0646 \u0646\u0648\u0639 \u06a9\u0627\u0647\u0634 LR \u0647\u0645\u0631\u0627\u0647 \u0628\u0627 \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646\u060c \u0627\u062c\u0631\u0627 \u06a9\u0631\u062f\u0647\u200c\u0627\u06cc\u0645.  \u0634\u0645\u0627 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u0647\u0631 \u062a\u0627\u0628\u0639 \u062f\u06cc\u06af\u0631\u06cc \u0631\u0627 \u0628\u0631\u0627\u06cc \u06a9\u0627\u0647\u0634 \u067e\u06cc\u0627\u062f\u0647\u200c\u0633\u0627\u0632\u06cc \u06a9\u0646\u06cc\u062f\u060c \u06cc\u0627 \u0627\u0635\u0644\u0627\u064b \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0631\u0627 \u06a9\u0627\u0647\u0634 \u0646\u062f\u0647\u06cc\u062f &#8211; \u0622\u0646 \u0631\u0627 \u0628\u0647 \u0633\u0627\u06cc\u0631 \u0641\u0631\u0627\u062e\u0648\u0627\u0646\u200c\u0647\u0627 \u0648\u0627\u06af\u0630\u0627\u0631 \u06a9\u0646\u06cc\u062f <code>ReduceLROnPlateau()<\/code>.  \u0645\u0627 \u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0631\u0627 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u06cc\u06a9 Keras Callback\u060c \u0648 \u0647\u0645\u0686\u0646\u06cc\u0646 \u06cc\u06a9 \u0628\u0631\u0646\u0627\u0645\u0647 \u0628\u0647\u06cc\u0646\u0647 \u0633\u0627\u0632 Keras \u067e\u06cc\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u06a9\u0631\u062f\u0647 \u0627\u06cc\u0645 \u0648 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0631\u0627 \u062f\u0631 \u0637\u0648\u0644 \u062f\u0648\u0631\u0647 \u0647\u0627 \u062a\u0631\u0633\u06cc\u0645 \u06a9\u0631\u062f\u0647 \u0627\u06cc\u0645.<\/p>\n<\/div>\n<p><script>\n                        !function(f,b,e,v,n,t,s)\n                        {if(f.fbq)return;n=f.fbq=function(){n.callMethod?\n                        n.callMethod.apply(n,arguments):n.queue.push(arguments)};\n                        if(!f._fbq)f._fbq=n;n.push=n;n.loaded=!0;n.version='2.0';\n                        n.queue=();t=b.createElement(e);t.async=!0;\n                        t.src=v;s=b.getElementsByTagName(e)(0);\n                        s.parentNode.insertBefore(t,s)}(window, document,'script',\n                        'https:\/\/connect.facebook.net\/en_US\/fbevents.js');\n                        fbq('init', '525232124909042');\n                        fbq('track', 'PageView');\n                    <\/script>    (\u0628\u0631\u0686\u0633\u0628\u200c\u0647\u0627 \u0628\u0647 \u062a\u0631\u062c\u0645\u0647)# python<br \/>\n<br \/><br \/>\n<br \/>\u0645\u0646\u062a\u0634\u0631 \u0634\u062f\u0647 \u062f\u0631 1403-01-03 11:14:03<br \/>\n<\/p>\n\n\n<div class=\"kk-star-ratings kksr-auto kksr-align-center kksr-valign-bottom\"\n    data-payload='{&quot;align&quot;:&quot;center&quot;,&quot;id&quot;:&quot;13890&quot;,&quot;slug&quot;:&quot;default&quot;,&quot;valign&quot;:&quot;bottom&quot;,&quot;ignore&quot;:&quot;&quot;,&quot;reference&quot;:&quot;auto&quot;,&quot;class&quot;:&quot;&quot;,&quot;count&quot;:&quot;0&quot;,&quot;legendonly&quot;:&quot;&quot;,&quot;readonly&quot;:&quot;&quot;,&quot;score&quot;:&quot;0&quot;,&quot;starsonly&quot;:&quot;&quot;,&quot;best&quot;:&quot;5&quot;,&quot;gap&quot;:&quot;5&quot;,&quot;greet&quot;:&quot;\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628&quot;,&quot;legend&quot;:&quot;0\\\/5 (0 \u0631\u0627\u06cc)&quot;,&quot;size&quot;:&quot;30&quot;,&quot;title&quot;:&quot;\u06af\u0631\u0645 \u06a9\u0631\u062f\u0646 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0628\u0627 \u0648\u0627\u067e\u0627\u0634\u06cc \u06a9\u0633\u06cc\u0646\u0648\u0633 \u062f\u0631 Keras\\\/TensorFlow&quot;,&quot;width&quot;:&quot;0&quot;,&quot;_legend&quot;:&quot;{score}\\\/{best} ({count} \u0631\u0627\u06cc)&quot;,&quot;font_factor&quot;:&quot;1.25&quot;}'>\n            \n<div class=\"kksr-stars\">\n    \n<div class=\"kksr-stars-inactive\">\n            <div class=\"kksr-star\" data-star=\"1\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"2\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"3\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"4\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"5\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n    \n<div class=\"kksr-stars-active\" style=\"width: 0px;\">\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n<\/div>\n                \n\n<div class=\"kksr-legend\" style=\"font-size: 24px;\">\n            <span class=\"kksr-muted\">\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628<\/span>\n    <\/div>\n    <\/div>\n","protected":false},"excerpt":{"rendered":"<p><span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 7<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span>\u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u06cc\u06a9 \u0641\u0631\u0627\u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0645\u0647\u0645 \u062f\u0631 \u0634\u0628\u06a9\u0647 \u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0639\u0645\u06cc\u0642 \u0627\u0633\u062a &#8211; \u0648 \u0645\u0633\u062a\u0642\u06cc\u0645\u0627\u064b \u0622\u0646 \u0631\u0627 \u062f\u06cc\u06a9\u062a\u0647 \u0645\u06cc \u06a9\u0646\u062f \u062f\u0631\u062c\u0647 \u06a9\u0647 \u0628\u0647\u200c\u0631\u0648\u0632\u0631\u0633\u0627\u0646\u06cc\u200c\u0647\u0627\u06cc\u06cc \u0628\u0631\u0627\u06cc \u0648\u0632\u0646\u200c\u0647\u0627 \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc\u200c\u0634\u0648\u062f\u060c \u06a9\u0647 \u062a\u062e\u0645\u06cc\u0646 \u0632\u062f\u0647 \u0645\u06cc\u200c\u0634\u0648\u062f \u0628\u0631\u062e\u06cc \u0627\u0632 \u0639\u0645\u0644\u06a9\u0631\u062f\u0647\u0627\u06cc \u062a\u0644\u0641\u0627\u062a \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0631\u0627 \u0628\u0647 \u062d\u062f\u0627\u0642\u0644 \u0628\u0631\u0633\u0627\u0646\u062f. \u062f\u0631 SGD: $$weight_{t+1} = weight_t &#8211; lr * \\frac{derror}{dweight_t}$$ \u0628\u0627 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0627\u0632 0\u060c \u0648\u0632\u0646 [&hellip;]<\/p>\n","protected":false},"author":3,"featured_media":13891,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1743,620],"tags":[],"class_list":["post-13890","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-python","category-programming"],"acf":[],"_links":{"self":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/13890","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/comments?post=13890"}],"version-history":[{"count":0,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/13890\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media\/13891"}],"wp:attachment":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media?parent=13890"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/categories?post=13890"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/tags?post=13890"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}