spark open dynamic allocate step - Original

1. Set Environment Variables

$ export HADOOP_CONF_DIR=/hadoop/hadoop-2.7XX/etc/hadoop
$ export HADOOP_HOME=/hadoop/hadoop-2.7XX
$ export SPARK_HOME=/hadoop/spark-2.4.0-bin-hadoop2.7
$ hds=(`cat ${HADOOP_CONF_DIR}/slaves` 'namenode1' 'namenode2')

2. yarn configuration file

  • bakup yarn-site.xml

    $ for i in ${hds[@]}  ; do echo $i ; ssh $i "cp ${HADOOP_CONF_DIR}/yarn-site.xml ${HADOOP_CONF_DIR}/yarn-site.xml.pre_spark_shuffle.bak"  ; done;
    $ for i in ${hds[@]}  ; do echo $i ; ssh $i "ls ${HADOOP_CONF_DIR} | grep pre_spark_shuffle.bak"  ; done;
    
  • modify yarn-site.xml

    $ more ${HADOOP_CONF_DIR}/yarn-site.xml | grep -B 1 -A 2 "aux-services"
    
  • output

    <property>
      <name>yarn.nodemanager.aux-services</name>
      <value>mapreduce_shuffle,spark_shuffle</value>
    </property>
    <property>
      <name>yarn.nodemanager.aux-services.spark_shuffle.class</name>
      <value>org.apache.spark.network.yarn.YarnShuffleService</value>
    </property>
    
  • broadcast yarn-site.xml

     $ for i in ${hds[@]} ; do echo $i ; scp ${HADOOP_CONF_DIR}/yarn-site.xml ${i}:${HADOOP_CONF_DIR}/ ; done ;
     $ for i in ${hds[@]} ; do echo $i ; ssh $i  "cat ${HADOOP_CONF_DIR}/yarn-site.xml | grep -B 1 -A 2 'aux-services' " ; done ;
    
  • Check heapsize

    $ more ${HADOOP_CONF_DIR}/yarn-env.sh | grep "YARN_HEAPSIZE"
    
    • output

    YARN_HEAPSIZE=2000 # 根据实际情况调整
    
  • check yarn class path

    $ more ${HADOOP_CONF_DIR}/yarn-site.xml | grep "yarn.application.classpath"  
    # if finds nothing  , we can use default path $HADOOP_HOME/share/hadoop/yarn/
    
  • check yarn shuffle jar

    $ find ${SPARK_HOME} -iname "*yarn-shuffle.jar" 
    # get result :  spark-2.4.0-yarn-shuffle.jar 
    
  • copy yarn shuffle jar

    $ for i in ${hds[@]} ; do echo $i ; scp `find ${SPARK_HOME} -iname "*yarn-shuffle.jar"` ${i}:$HADOOP_HOME/share/hadoop/yarn/ ; done ;
    $ for i in ${hds[@]}  ; do echo $i ; ll -ltr $HADOOP_HOME/share/hadoop/yarn/   | grep shuffle  ; done ;
    
  • Restart yarn

      $ bash $HADOOP_HOME/sbin/stop-yarn.sh
      $ bash $HADOOP_HOME/sbin/start-yarn.sh
    
  • check application

    $ for i in ${hds[@]}  ; do echo $i ; ssh $i ". /etc/profile ; jps" | grep -i manager  ; done;
    

Reproduced in: https: //www.jianshu.com/p/d2b97ce16e89

Guess you like

Origin blog.csdn.net/weixin_33777877/article/details/91074412