Case I: Statistics website (real-time statistics)
Live streaming frame is calculated: Storm
1)spout
Data sources, data source access
Local files are as follows
Write spout procedure:
package pvcount; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.util.Map; import org.apache.storm.spout.SpoutOutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.IRichSpout; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Values; /** * @author Dawn * @date 2019年6月7日10:19:39 * @version 1.0 * 编写spout。接入本地数据源 */ PvCountSpout the implements IRichSpout {class public @Override Collector SpoutOutputCollector Private; Private the BufferedReader br; Private Line String; @Override public void nextTuple () { // read the data transmitted in each row the try { the while (! (= br.readLine Line ()) = null) { // sending data to splitbolt collector.emit (new new Values (Line)); // set delay the Thread.sleep (500); } } the catch (IOException E) { // the TODO Auto-Generated Block the catch e.printStackTrace (); } the catch (InterruptedException E) { // the TODO Auto-Generated Block the catch e.printStackTrace (); } } public void Open (the arg0 the Map, TopologyContext arg1, SpoutOutputCollector Collector) { this.collector=collector; //读取文件 try { br=new BufferedReader(new InputStreamReader(new FileInputStream("f:/temp/storm实时统计访问量/weblog.log"))); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } //别关流!!!! // finally { // if(br!=null) { // try { // br.close(); // } catch (IOException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } // } // } } @Override void declareOutputFields public (OutputFieldsDeclarer declarer) { //声明 declarer.declare (new new Fields ( "logs")); } // tuple successful callback processing method. Like kafka that callback callback function, as well as zookeeper callback function Process @Override public void ACK (Object arg0) { // TODO Auto-Generated Method, Stub } // If the spout this method is called failure mode is activated , and the command storm activate [topology name] same effect in Linux @Override public void the activate () { // the TODO Auto-Generated Method Stub } // Close the program executed before the spout can not be guaranteed is performed kill -9 does not perform storm kill is not performed @Override public void use Close () { // TODO Auto-Generated Method, Stub } // failure during the spout, nextTuple will not be called and storm deactivate the command in Linux [topology name] the same effect @Override public void the deactivate () { // the TODO Auto-Generated method Stub } method of processing // tuple failure callback @Override public void fail(Object arg0) { // TODO Auto-generated method stub } //配置 @Override public Map<String, Object> getComponentConfiguration() { // TODO Auto-generated method stub return null; } }
2) split bolt
Business logic processing
Segmentation data
To get the URL
pvcount Package; Import a java.util.Map; Import org.apache.storm.task.OutputCollector; Import org.apache.storm.task.TopologyContext; Import org.apache.storm.topology.IRichBolt; Import org.apache.storm. topology.OutputFieldsDeclarer; Import org.apache.storm.tuple.Fields; Import org.apache.storm.tuple.Tuple; Import org.apache.storm.tuple.Values; / ** * @author Dawn * @date June 2019 May 7 10:30:38 * @version 1.0 * segmentation data, get the URL * / public class PvCountSplitBolt the implements IRichBolt { Private OutputCollector Collector; Private pvnum int = 0; // service logic distributed cluster of concurrent threads (receiving then treated tuple) @Override void Execute public (Tuple INPUT) { // get the data. 1. String = input.getStringByField Line ( "logs"); . 2 // partitioned data String [] = line.split Fields ( "\ T"); String session_id Fields = [. 1]; .. 3 // local accumulation IF (session_id = null!) { pvnum ++; // output collector.emit (new new Values (Thread.currentThread () getId (), pvnum).); } } // initialization calls @Override public void PREPARE (the arg0 the Map, TopologyContext arg1, OutputCollector Collector) { this.collector = Collector; } // declare @Override public void declareOutputFields (OutputFieldsDeclarer declarer) { // declare an output declarer.declare (new new Fields ( "threadid", "pvnum")); } calls can not guarantee resource cleanup is called when a bolt coming off // @Override public void Cleanup () { // TODO Auto-Generated Method, Stub } // configure @Override public the Map <String, Object> getComponentConfiguration () { // Generated Method Stub the TODO Auto- return null; } }
3)bolt
Cumulative number of summation
package pvcount; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import org.apache.storm.task.OutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.IRichBolt; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.tuple.Tuple; /** * @author Dawn * @date 2019年6月7日10:39:52 * @version 1.0 * 累加次数求和 */ public class PvCountSumBolt implements IRichBolt{ private OutputCollector collector; private HashMap<Long, Integer> hashmap=new HashMap<>(); @Override public void cleanup() { } @Override public void Execute (Tuple INPUT) { // get the data. 1. Long the threadId = input.getLongByField ( "ThreadID"); Integer pvnum = input.getIntegerByField ( "pvnum"); . 2 // Create a set storage (threadid, pvnum) hashmap.put (the threadId, pvnum); // accumulated sum. 3 (to get the value of all values in the collection). ; the Iterator <Integer> = hashmap.values Iterator () Iterator (). before // 4 emptied. data int SUM = 0; the while (iterator.hasNext ()) { SUM Iterator.next + = (); } . System.err.println (Thread.currentThread () getName () + "totals of ->" + sum ); } @Override public void PREPARE (the arg0 the Map, TopologyContext arg1, OutputCollector Collector) { } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { } @Override public Map<String, Object> getComponentConfiguration() { // TODO Auto-generated method stub return null; } }
4)Driver
Using field grouping
pvcount Package; Import org.apache.storm.Config; Import org.apache.storm.LocalCluster; Import org.apache.storm.topology.TopologyBuilder; Import org.apache.storm.tuple.Fields; / ** * @author Dawn * @date 2019 Nian 6 Yue 7 Ri 10:45:53 * 1.0 @version site traffic statistics (real-time statistics) * / public class PvCountDriver { public static void main (String [] args) { // 1. create a topology TopologyBuilder = new new TopologyBuilder Builder (); // set 2. specify builder.setSpout ( "pvcountspout", new new PvCountSpout (),. 1); builder.setBolt ( "pvsplitbolt", new new PvCountSplitBolt (),. 6) .setNumTasks (. 4). fieldsGrouping ( "pvcountspout", new new Fields ( "logs")); builder.setBolt("pvcountbolt", new PvCountSumBolt(), 1).fieldsGrouping("pvsplitbolt", new Fields("threadid", "pvnum")); // 3.创建配置信息 Config conf = new Config(); conf.setNumWorkers(2); // 4.提交任务 LocalCluster localCluster = new LocalCluster(); localCluster.submitTopology("pvcounttopology", conf, builder.createTopology()); } }
Results are as follows:
A total of 190 data. Then add data after completion statistics. The program will continue statistics