梳理spark提交流程

很久不写spark代码,提交的时候遇到不少问题,操! 先记录下来,以后方便查看

先启动 spark集群,这里是三台(dfs-yarn-zk-spark)

启动完毕之后,需要关闭master服务 sbin/stop-master.sh

以ip方式启动

./sbin/start-master.sh  -h 192.168.199.120

spark 这就完毕了   这是提交的jar包  

./bin/spark-submit  --class sparkstreaming_action.wordfreq.main.WordFreq --num-executors 4    --driver-memory 512M --executor-memory 512M --executor-cores 1 --conf spark.default.parallelism=1000 /root/spark/spark/smart.jar
hdfs dfs -put input.txt /root/             这是代码中的原文件,

代码准备

1.先准备IDEA编译器

2.maven 工程
然后  设置好   maven仓库

3.删除  建立好的  java    test  ,resource

4.新增  main.scala       并把scala目录 目录整成主目录   (蓝色目录)

新建一个 scala   OBJECT

package sparkstreaming_action.wordfreq.main

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

object WordFreq {
  def main(args: Array[String]) {
    // Create spark context
    val conf = new SparkConf()
      .setAppName("WordFreq_Spark")
      .setMaster("spark://192.168.199.120:7077")
    val sc = new SparkContext(conf)
    val txtFile = "/root/input.txt"
    val txtData = sc.textFile(txtFile)
    txtData.cache()
    txtData.count()
    val wcData = txtData.flatMap { line => line.split(" ") }
      .map { word => (word, 1) }
      .reduceByKey(_ + _)
    wcData.collect().foreach(println)

    sc.stop
  }
}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.spark_smart</groupId>
    <artifactId>smart</artifactId>
    <version>1.0-SNAPSHOT</version>

            <dependencies>
                <dependency>
                    <groupId>org.apache.spark</groupId>
                    <artifactId>spark-core_2.11</artifactId>
                    <version>2.0.0</version>
                </dependency>
                <dependency><!-- Log -->
                    <groupId>log4j</groupId>
                    <artifactId>log4j</artifactId>
                    <version>1.2.17</version>
                </dependency>
                <dependency>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                    <version>1.7.12</version>
                </dependency>
            </dependencies>


<!--    <build>-->
<!--        <plugins>-->
<!--            &lt;!&ndash; mixed scala/java compile &ndash;&gt;-->
<!--            <plugin>-->
<!--                <groupId>org.scala-tools</groupId>-->
<!--                <artifactId>maven-scala-plugin</artifactId>-->
<!--                <executions>-->
<!--                    <execution>-->
<!--                        <id>compile</id>-->
<!--                        <goals>-->
<!--                            <goal>compile</goal>-->
<!--                        </goals>-->
<!--                        <phase>compile</phase>-->
<!--                    </execution>-->
<!--                    <execution>-->
<!--                        <id>test-compile</id>-->
<!--                        <goals>-->
<!--                            <goal>testCompile</goal>-->
<!--                        </goals>-->
<!--                        <phase>test-compile</phase>-->
<!--                    </execution>-->
<!--                    <execution>-->
<!--                        <phase>process-resources</phase>-->
<!--                        <goals>-->
<!--                            <goal>compile</goal>-->
<!--                        </goals>-->
<!--                    </execution>-->
<!--                </executions>-->
<!--            </plugin>-->
<!--            <plugin>-->
<!--                <artifactId>maven-compiler-plugin</artifactId>-->
<!--                <configuration>-->
<!--                    <source>1.7</source>-->
<!--                    <target>1.7</target>-->
<!--                </configuration>-->
<!--            </plugin>-->
<!--            &lt;!&ndash; for fatjar &ndash;&gt;-->
<!--            <plugin>-->
<!--                <groupId>org.apache.maven.plugins</groupId>-->
<!--                <artifactId>maven-assembly-plugin</artifactId>-->
<!--                <version>2.2</version>-->
<!--                <configuration>-->
<!--                    <descriptorRefs>-->
<!--                        <descriptorRef>jar-with-dependencies</descriptorRef>-->
<!--                    </descriptorRefs>-->
<!--                </configuration>-->
<!--                <executions>-->
<!--                    <execution>-->
<!--                        <id>assemble-all</id>-->
<!--                        <phase>package</phase>-->
<!--                        <goals>-->
<!--                            <goal>single</goal>-->
<!--                        </goals>-->
<!--                    </execution>-->
<!--                </executions>-->
<!--            </plugin>-->
<!--            <plugin>-->
<!--                <groupId>org.apache.maven.plugins</groupId>-->
<!--                <artifactId>maven-jar-plugin</artifactId>-->
<!--                <configuration>-->
<!--                    <archive>-->
<!--                        <manifest>-->
<!--                            <addClasspath>true</addClasspath>-->
<!--                            <mainClass>com.sparkstreaming.action.main.WordFreq</mainClass>-->
<!--                        </manifest>-->
<!--                    </archive>-->
<!--                </configuration>-->
<!--            </plugin>-->
<!--        </plugins>-->
<!--    </build>-->
<!--    <repositories>-->
<!--        <repository>-->
<!--            <id>aliyunmaven</id>-->
<!--            <url>http://maven.aliyun.com/nexus/content/groups/public/</url>-->
<!--        </repository>-->
<!--    </repositories>-->

</project>

猜你喜欢

转载自www.cnblogs.com/tangsonghuai/p/11184673.html