- kafka、storm、zookeeper部署
1、 安装环境 zookeeper-3.4.8,apache-storm-0.9.3,jdk1.8.0_91,kafka_2.10-0.10.0.0 2、 配置kafka # The id of the broker. This must be set to a unique integer for each broker. broker.id=0 # The port the socket server listens on port=9092 # A comma seperated list of directories under which to store log files log.dirs=/home/map/tools/kafka/log/kafka/kafka-logs num.partitions=2 # The minimum age of a log file to be eligible for deletion log.retention.hours=168 log.segment.bytes=1073741824 # to the retention policies log.retention.check.interval.ms=300000 zookeeper.connect=localhost:2181 # Timeout in ms for connecting to zookeeper zookeeper.connection.timeout.ms=6000 # log dir log.dirs=/home/map/tools/kafka/log/kafka
3、 zookeeper部署 logDir=/home/map/tools/zookeeper/logs/zookeeper/log.log dataDir=/home/map/tools/zookeeper/data/zookeeper # the port at which the clients will connect clientPort=2181 server.1=127.0.0.1:2888:3888 4、 storm配置 storm.zookeeper.servers: - 127.0.0.1 # - "server2" # nimbus.host: "127.0.0.1" storm.local.dir: "/home/map/tools/storm/data/storm" supervisor.slots.ports: - 6700 - 6701 - 6702 - 6703
- 组件连调
zookeeper-3.4.6.jar, zkclient-0.8.jar, scala-library-2.10.6.jar, metrics-core-2.2.0.jar, kafka_2.10-0.10.0.0.jar, kafka-clients-0.10.0.0.jar, kafka-tools-0.10.0.0.jar, jopt-simple-4.9.jar
- 运行守护进程
./zkServer.sh start ./storm nimbus & ./storm supervisor & ./kafka-server-start.sh ../config/server.properties &
- 代码测试
pom <dependencies> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> <version>0.9.3</version> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka_2.10</artifactId> <version>0.10.0.0</version> </dependency> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-kafka</artifactId> <version>0.9.3</version> </dependency> <dependency> <groupId>org.apache.zookeeper</groupId> <artifactId>zookeeper</artifactId> <version>3.4.8</version> <type>pom</type> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.3.1</version> </dependency> </dependencies> <build> <plugins> <plugin> <artifactId>maven-assembly-plugin</artifactId> <version>2.6</version> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> </configuration> <executions> <execution> <id>make-assembly</id> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> </plugin> </plugins> </build>
producer; import java.util.Properties; import java.util.concurrent.TimeUnit; import kafka.javaapi.producer.Producer; import kafka.producer.KeyedMessage; import kafka.producer.ProducerConfig; import kafka.serializer.StringEncoder; public class KafkaProducerTest extends Thread{ private String topic; public KafkaProducerTest(String topic) { super(); this.topic = topic; } @Override public void run() { Producer producer = createProducer(); int i = 0; long start_time = System.currentTimeMillis(); while (true) { producer.send(new KeyedMessage<Integer, String>(topic, "message: " + i++)); if (i % 50000 == 0){ long end_time = System.currentTimeMillis(); System.out.println("product tps:" + 50000/((end_time-start_time)/1000)); start_time = end_time; } // try { // TimeUnit.SECONDS.sleep(1); // } catch (InterruptedException e) { // e.printStackTrace(); // } } } private Producer createProducer() { Properties properties = new Properties(); properties.put("zookeeper.connect", "127.0.0.1:2181");// 声明zk properties.put("serializer.class", StringEncoder.class.getName()); properties.put("metadata.broker.list", "127.0.0.1:9092");// 声明kafka // broker return new Producer<Integer, String>(new ProducerConfig(properties)); } public static void main(String[] args) { new KafkaProducerTest("idoall_testTopic").start();// 使用kafka集群中创建好的主题 test } }
spout; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import kafka.consumer.ConsumerConfig; import kafka.consumer.ConsumerIterator; import kafka.consumer.KafkaStream; import kafka.javaapi.consumer.ConsumerConnector; import backtype.storm.spout.SpoutOutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.IRichSpout; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; public class KafkaSpoutTest implements IRichSpout { private static final long serialVersionUID = 1L; private SpoutOutputCollector collector; private ConsumerConnector consumer; private String topic; public KafkaSpoutTest() { } public KafkaSpoutTest(String topic) { this.topic = topic; } public void nextTuple() { } public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.collector = collector; } public void ack(Object msgId) { } public void activate() { consumer = kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig()); Map<String, Integer> topickMap = new HashMap<String, Integer>(); topickMap.put(topic, 1); System.out.println("*********Results********topic:" + topic); Map<String, List<KafkaStream<byte[], byte[]>>> streamMap = consumer.createMessageStreams(topickMap); KafkaStream<byte[], byte[]> stream = streamMap.get(topic).get(0); ConsumerIterator<byte[], byte[]> it = stream.iterator(); while (it.hasNext()) { String value = new String(it.next().message()); SimpleDateFormat formatter = new SimpleDateFormat( "yyyy年MM月dd日 HH:mm:ss SSS"); Date curDate = new Date(System.currentTimeMillis());// 获取当前时间 String str = formatter.format(curDate); System.out.println("storm接收到来自kafka的消息------->" + value); collector.emit(new Values(value, 1, str), value); } } private static ConsumerConfig createConsumerConfig() { Properties props = new Properties(); // 设置zookeeper的链接地址 // props.put("zookeeper.connect","m1:2181,m2:2181,s1:2181,s2:2181"); // props.put("zookeeper.connect","192.168.101.23:2181"); props.put("zookeeper.connect", "localhost:2181"); // 设置group id props.put("group.id", "1"); // kafka的group 消费记录是保存在zookeeper上的, 但这个信息在zookeeper上不是实时更新的, 需要有个间隔时间更新 props.put("auto.commit.interval.ms", "1000"); props.put("zookeeper.session.timeout.ms", "10000"); return new ConsumerConfig(props); } public void close() { } public void deactivate() { } public void fail(Object msgId) { } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word", "id", "time")); } @Override public Map<String, Object> getComponentConfiguration() { System.out.println("getComponentConfiguration被调用"); topic = "idoall_testTopic"; return null; } }
topology; import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.topology.base.BaseBasicBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import com.baidu.bainuo.storm.spout.KafkaSpoutTest; public class KafkaTopologyTest { public static void main(String[] args) { TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("spout", new KafkaSpoutTest(""), 1); builder.setBolt("bolt1", new Bolt1(), 2).shuffleGrouping("spout"); builder.setBolt("bolt2", new Bolt2(), 2).fieldsGrouping("bolt1", new Fields("word")); Map conf = new HashMap(); conf.put(Config.TOPOLOGY_WORKERS, 1); conf.put(Config.TOPOLOGY_DEBUG, true); // Config conf = new Config(); // conf.setDebug(true); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("kafka-storm", conf, builder.createTopology()); // Utils.sleep(1000 * 60 * 5); // local cluster test ... // cluster.shutdown(); } public static class Bolt1 extends BaseBasicBolt { public void execute(Tuple input, BasicOutputCollector collector) { try { String msg = input.getString(0); int id = input.getInteger(1); String time = input.getString(2); msg = msg + "bolt1"; System.out.println("对消息加工第1次-------[arg0]:" + msg + "---[arg1]:" + id + "---[arg2]:" + time + "------->" + msg); if (msg != null) { collector.emit(new Values(msg)); } } catch (Exception e) { e.printStackTrace(); } } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } } public static class Bolt2 extends BaseBasicBolt { // Map<String, Integer> counts = new HashMap<String, Integer>(); private AtomicInteger counter = new AtomicInteger(0); long start_time = System.currentTimeMillis(); public void execute(Tuple tuple, BasicOutputCollector collector) { String msg = tuple.getString(0); msg = msg + "bolt2"; System.out.println("对消息加工第2次---------->" + msg); collector.emit(new Values(msg, 1)); if (counter.getAndAdd(1) % 50000 == 0) { long end_time = System.currentTimeMillis(); System.out.println("bolt tps:" + 50000/((end_time-start_time)/1000)); start_time = end_time; } } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word", "count")); } } }
- 调试
执行:storm jar da-realtime-0.0.1-SNAPSHOT.jar com.xxx.xxx.storm.topology.KafkaTopologyTest kafka-storm 在另一个终端运行: ./kafka-console-producer.sh --broker-list localhost:9092 --topic idoall_testTopic 终端信息: message: 1389388 message: 1389389 message: 1389390 message: 1389391 message: 1389392 message: 1389393 message: 1389394 message: 1389395 message: 1389396 message: 1389397 message: 1389398 message: 1389399 message: 1389400 message: 1389401 message: 1389402
- 性能测试
java -cp /home/map/tools/kafka/libs/*:da-realtime-0.0.1-SNAPSHOT.jar com.baidu.bainuo.kafka.producer.KafkaProducerTest 生产数据为单线程,tps:16666 终端信息: product tps:12500 product tps:16666 product tps:16666 product tps:16666 product tps:16666 product tps:16666 product tps:16666 product tps:16666 product tps:16666 product tps:16666 storm消费程序 storm jar da-realtime-0.0.1-SNAPSHOT.jar com.baidu.bainuo.storm.topology.KafkaTopologyTest kafka-storm & tps:7142*4(4个端口) storm接收到来自kafka的消息------->message: 136409 32354 [Thread-16-spout] INFO backtype.storm.daemon.task - Emitting: spout default [message: 136409, 1, 2016年06月21日 17:45:26 135] 32354 [Thread-16-spout] INFO backtype.storm.daemon.task - Emitting: spout __ack_init [-6374788357877584636 -4155451146738645227 6] storm接收到来自kafka的消息------->message: 136410 32354 [Thread-16-spout] INFO backtype.storm.daemon.task - Emitting: spout default [message: 136410, 1, 2016年06月21日 17:45:26 135] 32354 [Thread-16-spout] INFO backtype.storm.daemon.task - Emitting: spout __ack_init [3880443609814161207 -5796955115394043660 6] storm接收到来自kafka的消息------->message: 136411 32354 [Thread-16-spout] INFO backtype.storm.daemon.task - Emitting: spout default [message: 136411, 1, 2016年06月21日 17:45:26 135] 32354 [Thread-16-spout] INFO backtype.storm.daemon.task - Emitting: spout __ack_init [-6993416477125797163 -5144834453070077636 6] storm接收到来自kafka的消息------->message: 136412 32354 [Thread-16-spout] INFO backtype.storm.daemon.task - Emitting: spout default [message: 136412, 1, 2016年06月21日 17:45:26 135]^C [email protected] ~/xxx$ cat kafka.log | grep tps bolt tps:7142 bolt tps:7142