{"id":17556,"date":"2024-06-26T14:37:09","date_gmt":"2024-06-26T11:07:09","guid":{"rendered":"https:\/\/rasanegaar.com\/blog\/pyspark-%d8%a8%d8%b1%d8%a7%db%8c-%d9%85%d8%a8%d8%aa%d8%af%db%8c%d8%a7%d9%86-%d8%b1%d9%88%d8%b4-%d9%be%d8%b1%d8%af%d8%a7%d8%b2%d8%b4-%d8%af%d8%a7%d8%af%d9%87-%d9%87%d8%a7-%d8%a8%d8%a7-apache\/"},"modified":"2024-06-26T14:37:09","modified_gmt":"2024-06-26T11:07:09","slug":"pyspark-%d8%a8%d8%b1%d8%a7%db%8c-%d9%85%d8%a8%d8%aa%d8%af%db%8c%d8%a7%d9%86-%d8%b1%d9%88%d8%b4-%d9%be%d8%b1%d8%af%d8%a7%d8%b2%d8%b4-%d8%af%d8%a7%d8%af%d9%87-%d9%87%d8%a7-%d8%a8%d8%a7-apache","status":"publish","type":"post","link":"https:\/\/rasanegaar.com\/blog\/pyspark-%d8%a8%d8%b1%d8%a7%db%8c-%d9%85%d8%a8%d8%aa%d8%af%db%8c%d8%a7%d9%86-%d8%b1%d9%88%d8%b4-%d9%be%d8%b1%d8%af%d8%a7%d8%b2%d8%b4-%d8%af%d8%a7%d8%af%d9%87-%d9%87%d8%a7-%d8%a8%d8%a7-apache\/","title":{"rendered":"PySpark \u0628\u0631\u0627\u06cc \u0645\u0628\u062a\u062f\u06cc\u0627\u0646 \u2013 \u0631\u0648\u0634 \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u0627\u062f\u0647 \u0647\u0627 \u0628\u0627 Apache Spark \u0648 Python"},"content":{"rendered":"<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_85 counter-hierarchy ez-toc-counter ez-toc-custom ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\"><p class=\"ez-toc-title\" style=\"cursor:inherit\">\u0633\u0631\u0641\u0635\u0644\u0647\u0627\u06cc \u0645\u0637\u0644\u0628<\/p>\n<\/div><nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/rasanegaar.com\/blog\/pyspark-%d8%a8%d8%b1%d8%a7%db%8c-%d9%85%d8%a8%d8%aa%d8%af%db%8c%d8%a7%d9%86-%d8%b1%d9%88%d8%b4-%d9%be%d8%b1%d8%af%d8%a7%d8%b2%d8%b4-%d8%af%d8%a7%d8%af%d9%87-%d9%87%d8%a7-%d8%a8%d8%a7-apache\/#pyspark_%da%86%db%8c%d8%b3%d8%aa%d8%9f\" >Pyspark \u0686\u06cc\u0633\u062a\u061f<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/rasanegaar.com\/blog\/pyspark-%d8%a8%d8%b1%d8%a7%db%8c-%d9%85%d8%a8%d8%aa%d8%af%db%8c%d8%a7%d9%86-%d8%b1%d9%88%d8%b4-%d9%be%d8%b1%d8%af%d8%a7%d8%b2%d8%b4-%d8%af%d8%a7%d8%af%d9%87-%d9%87%d8%a7-%d8%a8%d8%a7-apache\/#%d8%b1%d9%88%d8%b4_%da%a9%d8%a7%d8%b1_%d8%a8%d8%a7_pyspark\" >\u0631\u0648\u0634 \u06a9\u0627\u0631 \u0628\u0627 Pyspark<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/rasanegaar.com\/blog\/pyspark-%d8%a8%d8%b1%d8%a7%db%8c-%d9%85%d8%a8%d8%aa%d8%af%db%8c%d8%a7%d9%86-%d8%b1%d9%88%d8%b4-%d9%be%d8%b1%d8%af%d8%a7%d8%b2%d8%b4-%d8%af%d8%a7%d8%af%d9%87-%d9%87%d8%a7-%d8%a8%d8%a7-apache\/#%d9%86%d8%aa%db%8c%d8%ac%d9%87\" >\u0646\u062a\u06cc\u062c\u0647<\/a><\/li><\/ul><\/nav><\/div>\n<span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 3<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span><p> <br \/>\n<\/p>\n<section class=\"post-content \" data-test-label=\"post-content\">\n<p>\u0627\u06af\u0631 \u062f\u0631 \u062d\u0627\u0644 \u063a\u0648\u0627\u0635\u06cc \u062f\u0631 \u062f\u0646\u06cc\u0627\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0628\u0632\u0631\u06af \u0647\u0633\u062a\u06cc\u062f\u060c \u0627\u062d\u062a\u0645\u0627\u0644\u0627\u064b \u0628\u0627 \u0627\u0635\u0637\u0644\u0627\u062d PySpark \u0628\u0631\u062e\u0648\u0631\u062f \u06a9\u0631\u062f\u0647 \u0627\u06cc\u062f.<\/p>\n<p>PySpark \u0627\u0628\u0632\u0627\u0631\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0645\u062f\u06cc\u0631\u06cc\u062a \u0648 \u062a\u062c\u0632\u06cc\u0647 \u0648 \u062a\u062d\u0644\u06cc\u0644 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0628\u0632\u0631\u06af \u0631\u0627 \u0622\u0633\u0627\u0646 \u062a\u0631 \u0645\u06cc \u06a9\u0646\u062f.  \u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647\u060c \u0627\u0635\u0648\u0644 \u0627\u0648\u0644\u06cc\u0647 PySpark\u060c \u0645\u0632\u0627\u06cc\u0627\u06cc \u0622\u0646 \u0648 \u0631\u0648\u0634 \u0634\u0631\u0648\u0639 \u06a9\u0627\u0631 \u0628\u0627 \u0622\u0646 \u0631\u0627 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u06cc\u062f.<\/p>\n<h2 id=\"what-is-pyspark\"><span class=\"ez-toc-section\" id=\"pyspark_%da%86%db%8c%d8%b3%d8%aa%d8%9f\"><\/span>Pyspark \u0686\u06cc\u0633\u062a\u061f<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>PySpark \u06cc\u06a9 API \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc Apache Spark \u0627\u0633\u062a \u06a9\u0647 \u06cc\u06a9 \u0686\u0627\u0631\u0686\u0648\u0628 \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u0627\u062f\u0647 \u0628\u0632\u0631\u06af \u0627\u0633\u062a.<\/p>\n<p>Spark \u0628\u0631\u0627\u06cc \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u0627\u062f\u0647 \u0647\u0627 \u0648 \u0648\u0638\u0627\u06cc\u0641 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0627\u0634\u06cc\u0646\u06cc \u062f\u0631 \u0645\u0642\u06cc\u0627\u0633 \u0628\u0632\u0631\u06af \u0637\u0631\u0627\u062d\u06cc \u0634\u062f\u0647 \u0627\u0633\u062a.  \u0628\u0627 PySpark \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0631\u0646\u0627\u0645\u0647 \u0647\u0627\u06cc Spark \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 Python \u0628\u0646\u0648\u06cc\u0633\u06cc\u062f.<\/p>\n<p>\u06cc\u06a9\u06cc \u0627\u0632 \u062f\u0644\u0627\u06cc\u0644 \u0627\u0635\u0644\u06cc \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 PySpark \u0633\u0631\u0639\u062a \u0622\u0646 \u0627\u0633\u062a.  PySpark \u0645\u06cc \u062a\u0648\u0627\u0646\u062f process \u062f\u0627\u062f\u0647 \u0647\u0627 \u0628\u0633\u06cc\u0627\u0631 \u0633\u0631\u06cc\u0639\u062a\u0631 \u0627\u0632 \u0686\u0627\u0631\u0686\u0648\u0628 \u0647\u0627\u06cc \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u0627\u062f\u0647 \u0633\u0646\u062a\u06cc \u0627\u0633\u062a.<\/p>\n<p>\u0627\u06cc\u0646 \u0628\u0647 \u0627\u06cc\u0646 \u062f\u0644\u06cc\u0644 \u0627\u0633\u062a \u06a9\u0647 Pyspark \u0648\u0638\u0627\u06cc\u0641 \u0631\u0627 \u062f\u0631 \u0686\u0646\u062f\u06cc\u0646 \u0645\u0627\u0634\u06cc\u0646 \u062a\u0648\u0632\u06cc\u0639 \u0645\u06cc \u06a9\u0646\u062f \u0648 \u0622\u0646 \u0631\u0627 \u0641\u0648\u0642 \u0627\u0644\u0639\u0627\u062f\u0647 \u06a9\u0627\u0631\u0622\u0645\u062f \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u0645\u0632\u06cc\u062a \u062f\u06cc\u06af\u0631 \u0633\u0647\u0648\u0644\u062a \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0633\u062a.  \u0627\u06af\u0631 \u0628\u0627 \u067e\u0627\u06cc\u062a\u0648\u0646 \u0622\u0634\u0646\u0627\u06cc\u06cc \u062f\u0627\u0631\u06cc\u062f\u060c \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc PySpark \u0628\u0631\u0627\u06cc \u0634\u0645\u0627 \u0622\u0633\u0627\u0646 \u0627\u0633\u062a.  \u0627\u0632 \u0633\u06cc\u0646\u062a\u06a9\u0633 \u0648 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647\u200c\u0647\u0627\u06cc \u0622\u0634\u0646\u0627\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u06a9\u0646\u062f\u060c \u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0647 \u0633\u0631\u0639\u062a \u0633\u0631\u0639\u062a \u062e\u0648\u062f \u0631\u0627 \u0627\u0641\u0632\u0627\u06cc\u0634 \u062f\u0647\u06cc\u062f.<\/p>\n<p>\u0645\u0642\u06cc\u0627\u0633 \u067e\u0630\u06cc\u0631\u06cc \u06cc\u06a9\u06cc \u062f\u06cc\u06af\u0631 \u0627\u0632 \u0645\u0632\u0627\u06cc\u0627\u06cc \u06a9\u0644\u06cc\u062f\u06cc PySpark \u0627\u0633\u062a.  \u0686\u0647 \u0628\u0627 \u06cc\u06a9 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u06a9\u0648\u0686\u06a9 \u06a9\u0627\u0631 \u06a9\u0646\u06cc\u062f \u0648 \u0686\u0647 \u0628\u0627 \u06cc\u06a9 \u0645\u062c\u0645\u0648\u0639\u0647 \u0639\u0638\u06cc\u0645\u060c PySpark \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u0647\u0645\u0647 \u0622\u0646 \u0631\u0627 \u0645\u062f\u06cc\u0631\u06cc\u062a \u06a9\u0646\u062f.<\/p>\n<p>Pyspark \u0627\u0632 \u06cc\u06a9 \u0645\u0627\u0634\u06cc\u0646 \u0628\u0647 \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u06cc \u0645\u062a\u0634\u06a9\u0644 \u0627\u0632 \u0647\u0632\u0627\u0631\u0627\u0646 \u0645\u0627\u0634\u06cc\u0646 \u0645\u0642\u06cc\u0627\u0633 \u0645\u06cc \u0634\u0648\u062f.  \u0627\u06cc\u0646 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0639\u0646\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u0632 \u06a9\u0648\u0686\u06a9 \u0634\u0631\u0648\u0639 \u06a9\u0646\u06cc\u062f \u0648 \u0628\u0627 \u0627\u0641\u0632\u0627\u06cc\u0634 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u062e\u0648\u062f \u06af\u0633\u062a\u0631\u0634 \u062f\u0647\u06cc\u062f.<\/p>\n<p>PySpark \u0647\u0645\u0686\u0646\u06cc\u0646 \u0628\u0627 \u0633\u0627\u06cc\u0631 \u0627\u0628\u0632\u0627\u0631\u0647\u0627\u06cc \u06a9\u0644\u0627\u0646 \u062f\u0627\u062f\u0647 \u0645\u0627\u0646\u0646\u062f Hadoop \u0648 Apache Hive \u0628\u0647 \u062e\u0648\u0628\u06cc \u0627\u062f\u063a\u0627\u0645 \u0645\u06cc \u0634\u0648\u062f.  \u0627\u06cc\u0646 \u0627\u0645\u0631 \u0622\u0646 \u0631\u0627 \u0628\u0647 \u06cc\u06a9 \u0627\u0646\u062a\u062e\u0627\u0628 \u0647\u0645\u0647 \u06a9\u0627\u0631\u0647 \u0628\u0631\u0627\u06cc \u06a9\u0627\u0631\u0647\u0627\u06cc \u0645\u0647\u0646\u062f\u0633\u06cc \u062f\u0627\u062f\u0647 \u062a\u0628\u062f\u06cc\u0644 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<h2 id=\"how-to-work-with-pyspark\"><span class=\"ez-toc-section\" id=\"%d8%b1%d9%88%d8%b4_%da%a9%d8%a7%d8%b1_%d8%a8%d8%a7_pyspark\"><\/span>\u0631\u0648\u0634 \u06a9\u0627\u0631 \u0628\u0627 Pyspark<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u062d\u0627\u0644\u0627 \u0628\u06cc\u0627\u06cc\u06cc\u062f \u062f\u0631 \u0645\u0648\u0631\u062f \u0634\u0631\u0648\u0639 \u06a9\u0627\u0631 \u0628\u0627 PySpark \u0635\u062d\u0628\u062a \u06a9\u0646\u06cc\u0645.<\/p>\n<p>\u0642\u0628\u0644 \u0627\u0632 \u0634\u0631\u0648\u0639\u060c \u0628\u0627\u06cc\u062f \u067e\u0627\u06cc\u062a\u0648\u0646 \u0648 \u062c\u0627\u0648\u0627 \u0631\u0627 \u0646\u0635\u0628 \u06a9\u0646\u06cc\u062f \u0631\u0648\u06cc \u0633\u06cc\u0633\u062a\u0645 \u0634\u0645\u0627  \u0634\u0645\u0627 \u0647\u0645\u0686\u0646\u06cc\u0646 \u0628\u0627\u06cc\u062f Apache Spark \u0631\u0627 \u0646\u0635\u0628 \u06a9\u0646\u06cc\u062f. \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0622\u0646 \u0631\u0627 \u0627\u0632 \u0648\u0628 \u0633\u0627\u06cc\u062a \u0631\u0633\u0645\u06cc Spark \u062f\u0627\u0646\u0644\u0648\u062f \u06a9\u0646\u06cc\u062f. <\/p>\n<p>\u067e\u0633 \u0627\u0632 \u0627\u06cc\u062c\u0627\u062f \u0627\u06cc\u0646 \u067e\u06cc\u0634 \u0646\u06cc\u0627\u0632\u0647\u0627\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f PySpark \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0622\u0646 \u0646\u0635\u0628 \u06a9\u0646\u06cc\u062f <code>pip<\/code>\u060c \u0646\u0635\u0628 \u06a9\u0646\u0646\u062f\u0647 \u0628\u0633\u062a\u0647 \u067e\u0627\u06cc\u062a\u0648\u0646. <\/p>\n<pre><code>pip install pyspark<\/code><\/pre>\n<p>\u067e\u0633 \u0627\u0632 \u0646\u0635\u0628 PySpark\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0634\u0631\u0648\u0639 \u0628\u0647 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0622\u0646 \u06a9\u0646\u06cc\u062f process \u062f\u0627\u062f\u0647 \u0647\u0627.<\/p>\n<p>\u0634\u0645\u0627 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u06cc\u06a9 \u062c\u0644\u0633\u0647 Spark \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f\u060c \u06a9\u0647 \u0646\u0642\u0637\u0647 \u0648\u0631\u0648\u062f \u0628\u0631\u0627\u06cc \u0647\u0631 \u0628\u0631\u0646\u0627\u0645\u0647 Spark \u0627\u0633\u062a.  \u0627\u0632 \u0622\u0646\u062c\u0627\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u062e\u0648\u062f \u0631\u0627 \u062f\u0631 \u06cc\u06a9 DataFrame \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0627\u0631\u06af\u0630\u0627\u0631\u06cc \u06a9\u0646\u06cc\u062f.<\/p>\n<p>DataFrame \u0645\u062c\u0645\u0648\u0639\u0647 \u0627\u06cc \u062a\u0648\u0632\u06cc\u0639 \u0634\u062f\u0647 \u0627\u0632 \u062f\u0627\u062f\u0647 \u0647\u0627 \u0627\u0633\u062a \u06a9\u0647 \u062f\u0631 \u0633\u062a\u0648\u0646 \u0647\u0627\u06cc \u0646\u0627\u0645\u06af\u0630\u0627\u0631\u06cc \u0634\u062f\u0647 \u0633\u0627\u0632\u0645\u0627\u0646\u062f\u0647\u06cc \u0634\u062f\u0647 \u0627\u0646\u062f.  DataFrames \u0634\u0628\u06cc\u0647 \u062c\u062f\u0627\u0648\u0644 \u0645\u0648\u062c\u0648\u062f \u062f\u0631 \u067e\u0627\u06cc\u06af\u0627\u0647 \u062f\u0627\u062f\u0647 \u0627\u0633\u062a \u0648 \u062f\u0633\u062a\u06a9\u0627\u0631\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0634\u0645\u0627 \u0631\u0627 \u0622\u0633\u0627\u0646 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0639\u0645\u0644\u06cc\u0627\u062a \u0645\u062e\u062a\u0644\u0641\u06cc \u0631\u0627 \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u062f \u0631\u0648\u06cc DataFrames\u060c \u0645\u0627\u0646\u0646\u062f \u0641\u06cc\u0644\u062a\u0631 \u06a9\u0631\u062f\u0646\u060c \u06af\u0631\u0648\u0647 \u0628\u0646\u062f\u06cc \u0648 \u062c\u0645\u0639 \u0622\u0648\u0631\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627.  PySpark \u0637\u06cc\u0641 \u06af\u0633\u062a\u0631\u062f\u0647 \u0627\u06cc \u0627\u0632 \u062a\u0648\u0627\u0628\u0639 \u0631\u0627 \u0628\u0631\u0627\u06cc \u06a9\u0645\u06a9 \u0628\u0647 \u0634\u0645\u0627 \u062f\u0631 \u0627\u0646\u062c\u0627\u0645 \u0627\u06cc\u0646 \u0648\u0638\u0627\u06cc\u0641 \u0641\u0631\u0627\u0647\u0645 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u0628\u0631\u0627\u06cc \u0627\u06cc\u0646\u06a9\u0647 \u0637\u0639\u0645 PySpark \u0631\u0627 \u0628\u0686\u0634\u06cc\u062f\u060c \u0628\u0647 \u06cc\u06a9 \u0645\u062b\u0627\u0644 \u0633\u0627\u062f\u0647 \u0646\u06af\u0627\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645.<\/p>\n<p>\u0641\u0631\u0636 \u06a9\u0646\u06cc\u062f \u06cc\u06a9 \u0641\u0627\u06cc\u0644 CSV \u0628\u0627 \u0645\u0642\u062f\u0627\u0631\u06cc \u062f\u0627\u062f\u0647 \u062f\u0627\u0631\u06cc\u062f.  \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u06cc\u0646 \u062f\u0627\u062f\u0647 \u0647\u0627 \u0631\u0627 \u062f\u0631 \u06cc\u06a9 DataFrame \u0628\u0627\u0631\u06af\u0630\u0627\u0631\u06cc \u06a9\u0646\u06cc\u062f \u0648 \u0639\u0645\u0644\u06cc\u0627\u062a \u0627\u0635\u0644\u06cc \u0631\u0627 \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u062f \u0631\u0648\u06cc \u0622\u06cc \u062a\u06cc.<\/p>\n<p>\u0627\u0628\u062a\u062f\u0627 \u06cc\u06a9 \u062c\u0644\u0633\u0647 Spark \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code>from pyspark.sql import SparkSessionspark = SparkSession.builder.appName(\u201cexample\u201d).getOrCreate()<\/code><\/pre>\n<p>\u0633\u067e\u0633\u060c \u0641\u0627\u06cc\u0644 CSV \u062e\u0648\u062f \u0631\u0627 \u062f\u0631 \u06cc\u06a9 DataFrame \u0628\u0627\u0631\u06af\u0630\u0627\u0631\u06cc \u06a9\u0646\u06cc\u062f:<\/p>\n<pre><code>df = spark.read.csv(\u201cpath\/to\/your\/file.csv\u201d, header=True, inferSchema=True)<\/code><\/pre>\n<p>\u0627\u06a9\u0646\u0648\u0646 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0639\u0645\u0644\u06cc\u0627\u062a \u0631\u0627 \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u062f \u0631\u0648\u06cc \u0627\u06cc\u0646 DataFrame  \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u0645\u062b\u0627\u0644\u060c \u0628\u0631\u0627\u06cc \u0641\u06cc\u0644\u062a\u0631 \u06a9\u0631\u062f\u0646 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc\u06cc \u06a9\u0647 \u06cc\u06a9 \u0633\u062a\u0648\u0646 \u062e\u0627\u0635 \u062f\u0627\u0631\u0627\u06cc \u0645\u0642\u062f\u0627\u0631 \u0645\u0634\u062e\u0635\u06cc \u0627\u0633\u062a\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0627\u0632:<\/p>\n<pre><code>filtered_df = df.filter(df[\u201ccolumn_name\u201d] == \u201cvalue\u201d)<\/code><\/pre>\n<p>\u0647\u0645\u0686\u0646\u06cc\u0646 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u062f\u0627\u062f\u0647 \u0647\u0627 \u0631\u0627 \u0628\u0631 \u0627\u0633\u0627\u0633 \u06cc\u06a9 \u0633\u062a\u0648\u0646 \u06af\u0631\u0648\u0647 \u0628\u0646\u062f\u06cc \u06a9\u0646\u06cc\u062f \u0648 \u0645\u062c\u0645\u0648\u0639 \u0647\u0627 \u0631\u0627 \u0645\u062d\u0627\u0633\u0628\u0647 \u06a9\u0646\u06cc\u062f\u060c \u0645\u0627\u0646\u0646\u062f \u0645\u06cc\u0627\u0646\u06af\u06cc\u0646 \u0645\u0642\u062f\u0627\u0631 \u06cc\u06a9 \u0633\u062a\u0648\u0646 \u062f\u06cc\u06af\u0631:<\/p>\n<pre><code>grouped_df = df.groupBy(\u201ccolumn_name\u201d).agg({\u201canother_column\u201d: \u201cavg\u201d})<\/code><\/pre>\n<p>\u0627\u06cc\u0646\u0647\u0627 \u062a\u0646\u0647\u0627 \u0686\u0646\u062f \u0646\u0645\u0648\u0646\u0647 \u0627\u0632 \u06a9\u0627\u0631\u0647\u0627\u06cc\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0628\u0627 PySpark \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u06cc\u062f.  \u0627\u06cc\u0646 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0628\u0633\u06cc\u0627\u0631 \u0642\u062f\u0631\u062a\u0645\u0646\u062f \u0627\u0633\u062a \u0648 \u0639\u0645\u0644\u06a9\u0631\u062f\u0647\u0627\u06cc \u0632\u06cc\u0627\u062f\u06cc \u0631\u0627 \u0628\u0631\u0627\u06cc \u06a9\u0645\u06a9 \u0628\u0647 \u0634\u0645\u0627 \u0627\u0631\u0627\u0626\u0647 \u0645\u06cc \u062f\u0647\u062f process \u0648 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u062e\u0648\u062f \u0631\u0627 \u062a\u062c\u0632\u06cc\u0647 \u0648 \u062a\u062d\u0644\u06cc\u0644 \u06a9\u0646\u06cc\u062f.<\/p>\n<h2 id=\"conclusion\"><span class=\"ez-toc-section\" id=\"%d9%86%d8%aa%db%8c%d8%ac%d9%87\"><\/span>\u0646\u062a\u06cc\u062c\u0647<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u062f\u0631 \u0646\u062a\u06cc\u062c\u0647\u060c PySpark \u06cc\u06a9 \u0627\u0628\u0632\u0627\u0631 \u0641\u0648\u0642 \u0627\u0644\u0639\u0627\u062f\u0647 \u0628\u0631\u0627\u06cc \u0647\u0631 \u06a9\u0633\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0628\u0627 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0628\u0632\u0631\u06af \u06a9\u0627\u0631 \u0645\u06cc \u06a9\u0646\u062f.  \u0627\u06cc\u0646 \u0633\u0631\u06cc\u0639\u060c \u0622\u0633\u0627\u0646 \u0628\u0631\u0627\u06cc \u0627\u0633\u062a\u0641\u0627\u062f\u0647\u060c \u0645\u0642\u06cc\u0627\u0633 \u067e\u0630\u06cc\u0631 \u0627\u0633\u062a \u0648 \u0628\u0647 \u062e\u0648\u0628\u06cc \u0628\u0627 \u0633\u0627\u06cc\u0631 \u0627\u0628\u0632\u0627\u0631\u0647\u0627\u06cc \u06a9\u0644\u0627\u0646 \u062f\u0627\u062f\u0647 \u0627\u062f\u063a\u0627\u0645 \u0645\u06cc \u0634\u0648\u062f. <\/p>\n<p>\u0628\u0627 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc PySpark\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u062a\u0645\u0627\u0645 \u067e\u062a\u0627\u0646\u0633\u06cc\u0644 Apache Spark \u0631\u0627 \u0628\u0627\u0632 \u06a9\u0646\u06cc\u062f \u0648 \u0645\u0647\u0627\u0631\u062a \u0647\u0627\u06cc \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u0627\u062f\u0647 \u062e\u0648\u062f \u0631\u0627 \u0628\u0647 \u0633\u0637\u062d \u0628\u0627\u0644\u0627\u062a\u0631\u06cc \u0628\u0628\u0631\u06cc\u062f. <\/p>\n<p>\u0628\u0646\u0627\u0628\u0631\u0627\u06cc\u0646\u060c \u067e\u06cc\u0634 \u0628\u0631\u0648\u06cc\u062f \u0648 PySpark \u0631\u0627 \u0627\u0645\u062a\u062d\u0627\u0646 \u06a9\u0646\u06cc\u062f.  \u0627\u0632 \u0627\u06cc\u0646\u06a9\u0647 \u0686\u0642\u062f\u0631 \u0645\u06cc \u062a\u0648\u0627\u0646\u062f \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u062f \u0634\u06af\u0641\u062a \u0632\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u062f \u0634\u062f.<\/p>\n<p>\u0627\u0645\u06cc\u062f\u0648\u0627\u0631\u06cc\u0645 \u0627\u0632 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647 \u0644\u0630\u062a \u0628\u0631\u062f\u0647 \u0628\u0627\u0634\u06cc\u062f.  \u0628\u0631\u0627\u06cc \u0645\u0642\u0627\u0644\u0627\u062a \u0628\u06cc\u0634\u062a\u0631 \u062f\u0631 \u0632\u0645\u06cc\u0646\u0647 \u0647\u0648\u0634 \u0645\u0635\u0646\u0648\u0639\u06cc\u060c \u0628\u0647 turingtalks.ai \u0645\u0631\u0627\u062c\u0639\u0647 \u06a9\u0646\u06cc\u062f.<\/p>\n<\/section>\n<p><br \/>\n<br \/>\u0645\u0646\u062a\u0634\u0631 \u0634\u062f\u0647 \u062f\u0631 1403-06-26 14:37:06<br \/>\n<\/p>\n\n\n<div class=\"kk-star-ratings kksr-auto kksr-align-center kksr-valign-bottom\"\n    data-payload='{&quot;align&quot;:&quot;center&quot;,&quot;id&quot;:&quot;17556&quot;,&quot;slug&quot;:&quot;default&quot;,&quot;valign&quot;:&quot;bottom&quot;,&quot;ignore&quot;:&quot;&quot;,&quot;reference&quot;:&quot;auto&quot;,&quot;class&quot;:&quot;&quot;,&quot;count&quot;:&quot;0&quot;,&quot;legendonly&quot;:&quot;&quot;,&quot;readonly&quot;:&quot;&quot;,&quot;score&quot;:&quot;0&quot;,&quot;starsonly&quot;:&quot;&quot;,&quot;best&quot;:&quot;5&quot;,&quot;gap&quot;:&quot;5&quot;,&quot;greet&quot;:&quot;\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628&quot;,&quot;legend&quot;:&quot;0\\\/5 (0 \u0631\u0627\u06cc)&quot;,&quot;size&quot;:&quot;30&quot;,&quot;title&quot;:&quot;PySpark \u0628\u0631\u0627\u06cc \u0645\u0628\u062a\u062f\u06cc\u0627\u0646 \u2013 \u0631\u0648\u0634 \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u0627\u062f\u0647 \u0647\u0627 \u0628\u0627 Apache Spark \u0648 Python&quot;,&quot;width&quot;:&quot;0&quot;,&quot;_legend&quot;:&quot;{score}\\\/{best} ({count} \u0631\u0627\u06cc)&quot;,&quot;font_factor&quot;:&quot;1.25&quot;}'>\n            \n<div class=\"kksr-stars\">\n    \n<div class=\"kksr-stars-inactive\">\n            <div class=\"kksr-star\" data-star=\"1\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"2\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"3\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"4\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" data-star=\"5\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n    \n<div class=\"kksr-stars-active\" style=\"width: 0px;\">\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n            <div class=\"kksr-star\" style=\"padding-left: 5px\">\n            \n\n<div class=\"kksr-icon\" style=\"width: 30px; height: 30px;\"><\/div>\n        <\/div>\n    <\/div>\n<\/div>\n                \n\n<div class=\"kksr-legend\" style=\"font-size: 24px;\">\n            <span class=\"kksr-muted\">\u0627\u0645\u062a\u06cc\u0627\u0632 \u0634\u0645\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0645\u0637\u0644\u0628<\/span>\n    <\/div>\n    <\/div>\n","protected":false},"excerpt":{"rendered":"<p><span class=\"span-reading-time rt-reading-time\" style=\"display: block;\"><span class=\"rt-label rt-prefix\">\u0632\u0645\u0627\u0646 \u0644\u0627\u0632\u0645 \u0628\u0631\u0627\u06cc \u0645\u0637\u0627\u0644\u0639\u0647: <\/span> <span class=\"rt-time\"> 3<\/span> <span class=\"rt-label rt-postfix\">\u062f\u0642\u06cc\u0642\u0647<\/span><\/span>\u0627\u06af\u0631 \u062f\u0631 \u062d\u0627\u0644 \u063a\u0648\u0627\u0635\u06cc \u062f\u0631 \u062f\u0646\u06cc\u0627\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0628\u0632\u0631\u06af \u0647\u0633\u062a\u06cc\u062f\u060c \u0627\u062d\u062a\u0645\u0627\u0644\u0627\u064b \u0628\u0627 \u0627\u0635\u0637\u0644\u0627\u062d PySpark \u0628\u0631\u062e\u0648\u0631\u062f \u06a9\u0631\u062f\u0647 \u0627\u06cc\u062f. PySpark \u0627\u0628\u0632\u0627\u0631\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0645\u062f\u06cc\u0631\u06cc\u062a \u0648 \u062a\u062c\u0632\u06cc\u0647 \u0648 \u062a\u062d\u0644\u06cc\u0644 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0628\u0632\u0631\u06af \u0631\u0627 \u0622\u0633\u0627\u0646 \u062a\u0631 \u0645\u06cc \u06a9\u0646\u062f. \u062f\u0631 \u0627\u06cc\u0646 \u0645\u0642\u0627\u0644\u0647\u060c \u0627\u0635\u0648\u0644 \u0627\u0648\u0644\u06cc\u0647 PySpark\u060c \u0645\u0632\u0627\u06cc\u0627\u06cc \u0622\u0646 \u0648 \u0631\u0648\u0634 \u0634\u0631\u0648\u0639 \u06a9\u0627\u0631 \u0628\u0627 \u0622\u0646 \u0631\u0627 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u06cc\u062f. Pyspark \u0686\u06cc\u0633\u062a\u061f PySpark [&hellip;]<\/p>\n","protected":false},"author":6,"featured_media":17557,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1743,1686],"tags":[1749,909,5257,1748,1928,5258,1844,1926,1919,1779,1814,1744,2295,1959,1796,2123,2002,1957,1765],"class_list":{"0":"post-17556","1":"post","2":"type-post","3":"status-publish","4":"format-standard","5":"has-post-thumbnail","6":"hentry","7":"category-python","8":"category-ai","9":"tag-python","10":"tag-apache","11":"tag-pyspark","12":"tag-python-hosting","13":"tag-python-","14":"tag-spark","15":"tag-1844","16":"tag-1926","17":"tag----python","18":"tag-1779","19":"tag-1814","20":"tag-1744","21":"tag-2295","22":"tag-1959","23":"tag-1796","24":"tag-2123","26":"tag-1957","27":"tag-1765"},"acf":[],"_links":{"self":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/17556","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/users\/6"}],"replies":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/comments?post=17556"}],"version-history":[{"count":0,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/posts\/17556\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media\/17557"}],"wp:attachment":[{"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/media?parent=17556"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/categories?post=17556"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/rasanegaar.com\/blog\/wp-json\/wp\/v2\/tags?post=17556"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}