Event Stream System(1)Basic Setup

Event Stream System(1)Basic Setup

Install zookeeper 3.4.10 latest version
>wget http://apache.claz.org/zookeeper/zookeeper-3.4.10/zookeeper-3.4.10.tar.gz
>tar zxvf zookeeper-3.4.10.tar.gz
>sudo ln -s ~/tool/zookeeper-3.4.10 /opt/zookeeper-3.4.10
>sudo ln -s /opt/zookeeper-3.4.10 /opt/zookeeper

Add to Path

>cp conf/zoo_sample.cfg conf/zoo.cfg

Start the Service
>zkServer.sh start conf/zoo.cfg

Connect to the Server
>zkCli.sh -server localhost:2181

Install latest stable Kafka version 2.10-10.20
>wget http://mirror.olnevhost.net/pub/apache/kafka/0.10.2.0/kafka_2.10-0.10.2.0.tgz
>tar zxvf kafka_2.10-0.10.2.0.tgz
Add to Path

Start the Service
>kafka-server-start.sh config/server.properties

Or start in the background
>nohup kafka-server-start.sh config/server.properties &

Create a topic test
>bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test

List all the topic
>bin/kafka-topics.sh --list --zookeeper localhost:2181

Produce some messages
>bin/kafka-console-producer.sh --broker-list localhost:9092 --topic test

Consume the messages
>bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic test --from-beginning

Spark stream
I use the latest package and I try to use the sample
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
                      http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.sillycat</groupId>
<artifactId>sillycat-eventstream</artifactId>
<version>1.0</version>
<description>sillycat spark stream</description>
<name>event stream spark</name>
<packaging>jar</packaging>

<properties>
<springframework.version>4.3.7.RELEASE</springframework.version>
<logging.version>1.7.25</logging.version>
</properties>

<dependencies>
<!-- spark -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<!-- cache -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>21.0</version>
</dependency>
<!-- logging -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${logging.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>${logging.version}</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.2.3</version>
</dependency>
<!-- spring framework -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
<version>${springframework.version}</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- apache -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.5</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<!-- testing -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<version>${springframework.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4.1</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>com.sillycat.sillycateventstream.ExecutorApp</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>assemble-all</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.0.0</version>
<executions>
<execution>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/spring.handlers</resource>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/spring.schemas</resource>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

Sample Codes from WordCount
package com.sillycat.sillycateventstream.apps;

import java.io.Serializable;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

import org.apache.spark.SparkConf;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;

import scala.Tuple2;

/**
* Consumes messages from one or more topics in Kafka and does wordcount.
*
* Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads> <zkQuorum>
* is a list of one or more zookeeper servers that make quorum <group> is the
* name of kafka consumer group <topics> is a list of one or more kafka topics
* to consume from <numThreads> is the number of threads the kafka consumer
* should use
*
* To run this example: `$ bin/run-example
* org.apache.spark.examples.streaming.JavaKafkaWordCount zoo01,zoo02, \ zoo03
* my-consumer-group topic1,topic2 1`
*/
public class JavaKafkaWordCount implements Serializable{
   
    private static final long serialVersionUID = -4598672873749563084L;

    private static final Pattern SPACE = Pattern.compile(" ");

    private JavaKafkaWordCount() {
    }

    public static void main(String[] args) throws Exception {

        SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount").setMaster("local[2]");
        // Create the context with 2 seconds batch size
        JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(10 * 1000));

        int numThreads = 1;
        Map<String, Integer> topicMap = new HashMap<>();
        topicMap.put("test", numThreads);

        JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, "fr-stage-consumer:2181",
                "spark-group", topicMap);

        JavaDStream<String> lines = messages.map( x->{
            String value = x._2;
            return value;
        });
       

        JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());

        JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1)).reduceByKey((i1, i2) -> i1 + i2);

        wordCounts.print();
       
        jssc.start();
        jssc.awaitTermination();
    }

}


Cassandra
>wget http://www.gtlib.gatech.edu/pub/apache/cassandra/3.10/apache-cassandra-3.10-bin.tar.gz
Place and add to PATH

Command to start
>cassandra -Dcassandra.config="file:///opt/cassandra/conf/cassandra.yaml"


References:
http://sillycat.iteye.com/blog/2170328
http://docs.datastax.com/en/archived/cassandra/2.0/cassandra/tools/toolsCUtility_t.html



猜你喜欢

转载自sillycat.iteye.com/blog/2370527