storm高并发PV统计,利用zookeeper锁输出汇总值

汇总型方案:
1、shuffleGrouping下,pv(单线程结果) * Executer并发数
一个Executer默认一个task,如果设置Task数大于1,公式应该是:
pv(单线程结果) * Task 数 ,
同一个Executer下task的线程ID相同,taskId不同

优点:简单、计算量小
缺点:稍有误差,但绝大多数场景能接受

优化:
案例PVBolt中每个Task都会输出一个汇总值,实际只需要一个Task输出汇总值,
利用Zookeeper锁来做到只一个Task输出汇总值,而且每5S输出一次


1、pom.xml增加zk
引用
pom.xml中增加ZK:
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.10</version>
</dependency>

引用

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.test</groupId>
  <artifactId>StormMavenProject</artifactId>
  <packaging>jar</packaging>
  <version>0.0.1-SNAPSHOT</version>
  <name>StormMavenProject</name>
  <url>http://maven.apache.org</url>
  <dependencies>
   
   <dependency>
    <groupId>org.ow2.asm</groupId>
    <artifactId>asm</artifactId>
    <version>5.0.3</version>
   </dependency>
<dependency>
    <groupId>org.clojure</groupId>
    <artifactId>clojure</artifactId>
    <version>1.7.0</version>
</dependency>
<dependency>
    <groupId>com.lmax</groupId>
    <artifactId>disruptor</artifactId>
    <version>3.3.2</version>
</dependency>
<dependency>
    <groupId>com.esotericsoftware</groupId>
    <artifactId>kryo</artifactId>
    <version>3.0.3</version>
</dependency>
<dependency>
    <groupId>org.apache.logging.log4j</groupId>
    <artifactId>log4j-api</artifactId>
    <version>2.8</version>
</dependency>
<dependency>
    <groupId>org.apache.logging.log4j</groupId>
    <artifactId>log4j-core</artifactId>
    <version>2.8</version>
</dependency>
<dependency>
    <groupId>org.slf4j</groupId>
    <artifactId>log4j-over-slf4j</artifactId>
    <version>1.6.6</version>
</dependency>
<dependency>
    <groupId>org.apache.logging.log4j</groupId>
    <artifactId>log4j-slf4j-impl</artifactId>
    <version>2.8</version>
</dependency>
<dependency>
    <groupId>com.esotericsoftware</groupId>
    <artifactId>minlog</artifactId>
    <version>1.3.0</version>
</dependency>
<dependency>
    <groupId>org.objenesis</groupId>
    <artifactId>objenesis</artifactId>
    <version>2.1</version>
</dependency>
<dependency>
    <groupId>com.esotericsoftware</groupId>
    <artifactId>reflectasm</artifactId>
    <version>1.10.1</version>
</dependency>

<dependency>
    <groupId>javax.servlet</groupId>
    <artifactId>servlet-api</artifactId>
    <version>2.5</version>
</dependency>
<dependency>
    <groupId>org.slf4j</groupId>
    <artifactId>slf4j-api</artifactId>
    <version>1.7.21</version>
</dependency>
<dependency>
    <groupId>org.apache.storm</groupId>
    <artifactId>storm-core</artifactId>
    <version>1.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.storm</groupId>
    <artifactId>storm-rename-hack</artifactId>
    <version>1.1.0</version>
</dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>

<dependency>
    <groupId>ring-cors</groupId>
    <artifactId>ring-cors</artifactId>
    <version>0.1.5</version>
</dependency>


<dependency>
    <groupId>org.apache.zookeeper</groupId>
    <artifactId>zookeeper</artifactId>
    <version>3.4.10</version>
</dependency>



  </dependencies>
  <build>
    <finalName>StormMavenProject</finalName>
  </build>
</project>


2、创建队列数据源

public class SourceSpout implements IRichSpout{

	/**
	 * 数据源Spout
	 */
	private static final long serialVersionUID = 1L;
	
	Queue<String> queue = new ConcurrentLinkedQueue<String>();
	
	SpoutOutputCollector collector = null;
	
	String str = null;

	public void nextTuple() {
		if (queue.size() >= 0) {
			collector.emit(new Values(queue.poll()));
		}
		try {
			Thread.sleep(500) ;
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
	
	}
	
	public void open(Map conf, TopologyContext context,
			SpoutOutputCollector collector) {
		try {
			this.collector = collector;
			
			Random random = new Random();
			String[] hosts = { "www.taobao.com" };
			String[] session_id = { "ABYH6Y4V4SCVXTG6DPB4VH9U123", "XXYH6YCGFJYERTT834R52FDXV9U34", "BBYH61456FGHHJ7JL89RG5VV9UYU7",
					"CYYH6Y2345GHI899OFG4V9U567", "VVVYH6Y4V4SFXZ56JIPDPB4V678" };
			String[] time = { "2014-01-07 08:40:50", "2014-01-07 08:40:51", "2014-01-07 08:40:52", "2014-01-07 08:40:53", 
					"2014-01-07 09:40:49", "2014-01-07 10:40:49", "2014-01-07 11:40:49", "2014-01-07 12:40:49" };
			
			for (int i = 0; i < 20; i++) {
				queue.add(hosts[0]+"\t"+session_id[random.nextInt(5)]+"\t"+time[random.nextInt(8)]);
			}
			
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public void close() {
		// TODO Auto-generated method stub
	}
	
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		// TODO Auto-generated method stub
		declarer.declare(new Fields("log"));
	}

	
	public Map<String, Object> getComponentConfiguration() {
		// TODO Auto-generated method stub
		return null;
	}
	
	public void ack(Object msgId) {
		// TODO Auto-generated method stub
		System.out.println("spout ack:"+msgId.toString());
	}

	
	public void activate() {
		// TODO Auto-generated method stub
		
	}



	
	public void deactivate() {
		// TODO Auto-generated method stub
		
	}

	
	public void fail(Object msgId) {
		// TODO Auto-generated method stub
		System.out.println("spout fail:"+msgId.toString());
	}

}


3、编写bolt

public class PVBolt implements IRichBolt{

	/**
	 * zookeeper写入某个线程的id到zookeeper目录,bolt在输出pv时,通过对比线程id确定是否输出。
	 */
	private static final long serialVersionUID = 1L;

	public static final String zk_path = "/lock/storm/pv";
	
	
	public void cleanup() {
		try {
			zKeeper.close();
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
	}
	String logString = null;
	String lockData = null;
	String session_id = null;
	ZooKeeper zKeeper = null;
	
	long Pv = 0;
	long beginTime = System.currentTimeMillis() ;
	long endTime = 0;
	
	
	public void execute(Tuple input) {
		try {
			endTime = System.currentTimeMillis() ;
			logString = input.getString(0);
			if (logString != null) {
				session_id = logString.split("\t")[1];
				if (session_id != null) {
					Pv ++ ;
				}
			}
			//5秒钟输出一次
			if (endTime - beginTime >= 5 * 1000) {
				System.err.println(lockData+" ======================== ");
				if (lockData.equals(new String(zKeeper.getData(zk_path, false, null)))) {
					System.err.println("pv ======================== "+ Pv * 4);
				}
				beginTime = System.currentTimeMillis() ;
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		
	}

	
	public void prepare(Map stormConf, TopologyContext context,
			OutputCollector collector) {
		try {
			//创建zookeeper对象,三个参数:zookeper节点,超时,监听
			zKeeper = new ZooKeeper("192.168.1.201:2181,192.168.1.202:2181",3000,new Watcher(){
				
				public void process(WatchedEvent event) {
					System.out.println("event:"+event.getType());
				}
			});
			
			//判断zookeeper是否连上,如果没连上,进入sleep状态
			while (zKeeper.getState() != ZooKeeper.States.CONNECTED) {
				Thread.sleep(1000);
			}
			
			InetAddress address = InetAddress.getLocalHost();
			lockData = address.getHostAddress() + ":" +context.getThisTaskId() ;
			System.err.println(lockData+"++++++++++++++++++++++++++++");
			//将线程ID写入zookeeper临时目录。
			if(zKeeper.exists(zk_path, false) == null)
			{
				zKeeper.create(zk_path, lockData.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
			}
			
		} catch (Exception e) {
			try {
				zKeeper.close();
			} catch (InterruptedException e1) {
				e1.printStackTrace();
			}
		}
		
	}

	
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		// TODO Auto-generated method stub
		
	}

	
	public Map<String, Object> getComponentConfiguration() {
		// TODO Auto-generated method stub
		return null;
	}

}



4、编写topoloy类


public class PvTopo {

	public static void main(String[] args) {

		TopologyBuilder builder = new TopologyBuilder();

		builder.setSpout("spout", new SourceSpout(), 1);
		
		builder.setBolt("bolt", new PVBolt(),4).shuffleGrouping("spout");
		
		Map conf = new HashMap();
		conf.put(Config.TOPOLOGY_WORKERS, 4);

		if (args.length > 0) {
			try {
					StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
			} catch (AlreadyAliveException e) {
				e.printStackTrace();
			} catch (InvalidTopologyException e) {
				e.printStackTrace();
			}catch (AuthorizationException e) {
				e.printStackTrace();
			}
		}else {
			LocalCluster localCluster = new LocalCluster();
			localCluster.submitTopology("mytopology", conf, builder.createTopology());
		}
		
	}

}


5、启动zookeeper、storm,master(superviser)主机上创建zookeeper临时路径
引用

进入zk目录
zkCli.sh -server localhost:2181
创建文件夹
create /lock "“
create /lock/storm “”
ls  /lock
get /lock/storm/pv



6、运行PvTopo结果





猜你喜欢

转载自javafu.iteye.com/blog/2374254
今日推荐