kafka+storm部署及测试

  • kafka、storm、zookeeper部署
1、	安装环境
zookeeper-3.4.8,apache-storm-0.9.3,jdk1.8.0_91,kafka_2.10-0.10.0.0
2、	配置kafka
# The id of the broker. This must be set to a unique integer for each broker.
broker.id=0

# The port the socket server listens on  
port=9092

# A comma seperated list of directories under which to store log files
log.dirs=/home/map/tools/kafka/log/kafka/kafka-logs

num.partitions=2

# The minimum age of a log file to be eligible for deletion
log.retention.hours=168

log.segment.bytes=1073741824

# to the retention policies
log.retention.check.interval.ms=300000

zookeeper.connect=localhost:2181

# Timeout in ms for connecting to zookeeper
zookeeper.connection.timeout.ms=6000

# log dir
log.dirs=/home/map/tools/kafka/log/kafka
 
3、	zookeeper部署
logDir=/home/map/tools/zookeeper/logs/zookeeper/log.log
dataDir=/home/map/tools/zookeeper/data/zookeeper
# the port at which the clients will connect
clientPort=2181
server.1=127.0.0.1:2888:3888 

4、	storm配置

storm.zookeeper.servers: 
  - 127.0.0.1
#     - "server2"
# 
nimbus.host: "127.0.0.1"

storm.local.dir: "/home/map/tools/storm/data/storm"  
supervisor.slots.ports: 
  - 6700  
  - 6701  
  - 6702  
  - 6703  
  •  组件连调
    将kafka/libs相关jar包导入storm/lib :
zookeeper-3.4.6.jar, zkclient-0.8.jar, scala-library-2.10.6.jar, metrics-core-2.2.0.jar, kafka_2.10-0.10.0.0.jar, kafka-clients-0.10.0.0.jar, kafka-tools-0.10.0.0.jar, jopt-simple-4.9.jar

 

  • 运行守护进程
./zkServer.sh start
./storm nimbus &
./storm supervisor &
./kafka-server-start.sh ../config/server.properties &

 

  • 代码测试

pom

<dependencies>
		<dependency>
			<groupId>org.apache.storm</groupId>
			<artifactId>storm-core</artifactId>
			<version>0.9.3</version>
		</dependency>

		<dependency>
			<groupId>org.apache.kafka</groupId>
			<artifactId>kafka_2.10</artifactId>
			<version>0.10.0.0</version>
		</dependency>

		<dependency>
			<groupId>org.apache.storm</groupId>
			<artifactId>storm-kafka</artifactId>
			<version>0.9.3</version>
		</dependency>

		<dependency>
			<groupId>org.apache.zookeeper</groupId>
			<artifactId>zookeeper</artifactId>
			<version>3.4.8</version>
			<type>pom</type>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.3.1</version>
		</dependency>

	</dependencies>

	<build>
		<plugins>
			<plugin>
				<artifactId>maven-assembly-plugin</artifactId>
				<version>2.6</version>
				<configuration>
					<descriptorRefs>
						<descriptorRef>jar-with-dependencies</descriptorRef>
					</descriptorRefs>
				</configuration>
				<executions>
					<execution>
						<id>make-assembly</id>
						<phase>package</phase>
						<goals>
							<goal>single</goal>
						</goals>
					</execution>
				</executions>
			</plugin>
		</plugins>
	</build>

 

producer;

import java.util.Properties;
import java.util.concurrent.TimeUnit;

import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import kafka.serializer.StringEncoder;

public class KafkaProducerTest extends Thread{
	private String topic;

	public KafkaProducerTest(String topic) {
		super();
		this.topic = topic;
	}

	@Override
	public void run() {
		Producer producer = createProducer();
		int i = 0;
		long start_time = System.currentTimeMillis();
		while (true) {
			producer.send(new KeyedMessage<Integer, String>(topic, "message: " + i++));
			if (i % 50000 == 0){
				long end_time = System.currentTimeMillis();
				System.out.println("product tps:" + 50000/((end_time-start_time)/1000));
				start_time = end_time;
			}
//			try {
//				TimeUnit.SECONDS.sleep(1);
//			} catch (InterruptedException e) {
//				e.printStackTrace();
//			}
		}
	}

	private Producer createProducer() {
		Properties properties = new Properties();
		properties.put("zookeeper.connect", "127.0.0.1:2181");// 声明zk
		properties.put("serializer.class", StringEncoder.class.getName());
		properties.put("metadata.broker.list", "127.0.0.1:9092");// 声明kafka
																			// broker
		return new Producer<Integer, String>(new ProducerConfig(properties));
	}

	public static void main(String[] args) {
		new KafkaProducerTest("idoall_testTopic").start();// 使用kafka集群中创建好的主题 test

	}
}
spout;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;

public class KafkaSpoutTest implements IRichSpout {
	private static final long serialVersionUID = 1L;
	private SpoutOutputCollector collector;
	private ConsumerConnector consumer;
	private String topic;

	public KafkaSpoutTest() {
	}

	public KafkaSpoutTest(String topic) {
		this.topic = topic;
	}

	public void nextTuple() {
	}

	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
		this.collector = collector;
	}

	public void ack(Object msgId) {
	}

	public void activate() {

		consumer = kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig());
		Map<String, Integer> topickMap = new HashMap<String, Integer>();
		topickMap.put(topic, 1);

		System.out.println("*********Results********topic:" + topic);

		Map<String, List<KafkaStream<byte[], byte[]>>> streamMap = consumer.createMessageStreams(topickMap);
		
		KafkaStream<byte[], byte[]> stream = streamMap.get(topic).get(0);
		ConsumerIterator<byte[], byte[]> it = stream.iterator();
		while (it.hasNext()) {
			String value = new String(it.next().message());
			SimpleDateFormat formatter = new SimpleDateFormat(
					"yyyy年MM月dd日 HH:mm:ss SSS");
			Date curDate = new Date(System.currentTimeMillis());// 获取当前时间
			String str = formatter.format(curDate);

			System.out.println("storm接收到来自kafka的消息------->" + value);

			collector.emit(new Values(value, 1, str), value);
		}
	}

	private static ConsumerConfig createConsumerConfig() {
		Properties props = new Properties();
		// 设置zookeeper的链接地址
		// props.put("zookeeper.connect","m1:2181,m2:2181,s1:2181,s2:2181");

		// props.put("zookeeper.connect","192.168.101.23:2181");
		props.put("zookeeper.connect", "localhost:2181");

		// 设置group id
		props.put("group.id", "1");
		// kafka的group 消费记录是保存在zookeeper上的, 但这个信息在zookeeper上不是实时更新的, 需要有个间隔时间更新
		props.put("auto.commit.interval.ms", "1000");
		props.put("zookeeper.session.timeout.ms", "10000");
		return new ConsumerConfig(props);
	}

	public void close() {
	}

	public void deactivate() {
	}

	public void fail(Object msgId) {
	}

	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("word", "id", "time"));
	}

	@Override
	public Map<String, Object> getComponentConfiguration() {
		System.out.println("getComponentConfiguration被调用");
		topic = "idoall_testTopic";
		return null;
	}
}
topology;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

import com.baidu.bainuo.storm.spout.KafkaSpoutTest;

public class KafkaTopologyTest {
	public static void main(String[] args) {
		TopologyBuilder builder = new TopologyBuilder();
		builder.setSpout("spout", new KafkaSpoutTest(""), 1);
		builder.setBolt("bolt1", new Bolt1(), 2).shuffleGrouping("spout");
		builder.setBolt("bolt2", new Bolt2(), 2).fieldsGrouping("bolt1",
				new Fields("word"));

		Map conf = new HashMap();
		conf.put(Config.TOPOLOGY_WORKERS, 1);
		conf.put(Config.TOPOLOGY_DEBUG, true);
		// Config conf = new Config();
		// conf.setDebug(true);

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("kafka-storm", conf, builder.createTopology());

		// Utils.sleep(1000 * 60 * 5); // local cluster test ...
		// cluster.shutdown();

	}

	public static class Bolt1 extends BaseBasicBolt {

		public void execute(Tuple input, BasicOutputCollector collector) {
			try {

				String msg = input.getString(0);
				int id = input.getInteger(1);
				String time = input.getString(2);
				msg = msg + "bolt1";
				System.out.println("对消息加工第1次-------[arg0]:" + msg
						+ "---[arg1]:" + id + "---[arg2]:" + time + "------->"
						+ msg);
				if (msg != null) {
					collector.emit(new Values(msg));
				}
			} catch (Exception e) {
				e.printStackTrace();
			}
		}

		public void declareOutputFields(OutputFieldsDeclarer declarer) {
			declarer.declare(new Fields("word"));
		}
	}

	public static class Bolt2 extends BaseBasicBolt {
		// Map<String, Integer> counts = new HashMap<String, Integer>();
		private AtomicInteger counter = new AtomicInteger(0);
		long start_time = System.currentTimeMillis();

		public void execute(Tuple tuple, BasicOutputCollector collector) {
			String msg = tuple.getString(0);
			msg = msg + "bolt2";
			System.out.println("对消息加工第2次---------->" + msg);
			collector.emit(new Values(msg, 1));
			if (counter.getAndAdd(1) % 50000 == 0) {
				long end_time = System.currentTimeMillis();
				System.out.println("bolt tps:" + 50000/((end_time-start_time)/1000));
				start_time = end_time;
			}
		}

		public void declareOutputFields(OutputFieldsDeclarer declarer) {
			declarer.declare(new Fields("word", "count"));
		}
	}
}
  •   调试
执行:storm jar da-realtime-0.0.1-SNAPSHOT.jar com.xxx.xxx.storm.topology.KafkaTopologyTest kafka-storm
在另一个终端运行:
./kafka-console-producer.sh --broker-list localhost:9092 --topic idoall_testTopic

终端信息:
message: 1389388
message: 1389389
message: 1389390
message: 1389391
message: 1389392
message: 1389393
message: 1389394
message: 1389395
message: 1389396
message: 1389397
message: 1389398
message: 1389399
message: 1389400
message: 1389401
message: 1389402
  •  性能测试
java -cp /home/map/tools/kafka/libs/*:da-realtime-0.0.1-SNAPSHOT.jar com.baidu.bainuo.kafka.producer.KafkaProducerTest
生产数据为单线程,tps:16666
终端信息:
product tps:12500
product tps:16666
product tps:16666
product tps:16666
product tps:16666
product tps:16666
product tps:16666
product tps:16666
product tps:16666
product tps:16666

storm消费程序
storm jar da-realtime-0.0.1-SNAPSHOT.jar com.baidu.bainuo.storm.topology.KafkaTopologyTest kafka-storm &
tps:7142*4(4个端口)

storm接收到来自kafka的消息------->message: 136409
32354 [Thread-16-spout] INFO  backtype.storm.daemon.task - Emitting: spout default [message: 136409, 1, 2016年06月21日 17:45:26 135]
32354 [Thread-16-spout] INFO  backtype.storm.daemon.task - Emitting: spout __ack_init [-6374788357877584636 -4155451146738645227 6]
storm接收到来自kafka的消息------->message: 136410
32354 [Thread-16-spout] INFO  backtype.storm.daemon.task - Emitting: spout default [message: 136410, 1, 2016年06月21日 17:45:26 135]
32354 [Thread-16-spout] INFO  backtype.storm.daemon.task - Emitting: spout __ack_init [3880443609814161207 -5796955115394043660 6]
storm接收到来自kafka的消息------->message: 136411
32354 [Thread-16-spout] INFO  backtype.storm.daemon.task - Emitting: spout default [message: 136411, 1, 2016年06月21日 17:45:26 135]
32354 [Thread-16-spout] INFO  backtype.storm.daemon.task - Emitting: spout __ack_init [-6993416477125797163 -5144834453070077636 6]
storm接收到来自kafka的消息------->message: 136412
32354 [Thread-16-spout] INFO  backtype.storm.daemon.task - Emitting: spout default [message: 136412, 1, 2016年06月21日 17:45:26 135]^C
[email protected] ~/xxx$ cat kafka.log | grep tps
bolt tps:7142
bolt tps:7142

 

 

 

 

猜你喜欢

转载自yugouai.iteye.com/blog/2306422