Storm Big Data learning real-time statistics site visits Case 35

Case I: Statistics website (real-time statistics)

 

Live streaming frame is calculated: Storm

 

1)spout

Data sources, data source access

Local files are as follows

Write spout procedure:

package pvcount;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Map;

import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichSpout;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;

/**
 * @author Dawn
 * @date 2019年6月7日10:19:39
 * @version 1.0
 * 编写spout。接入本地数据源
 */
PvCountSpout the implements IRichSpout {class public 
	@Override
	
	Collector SpoutOutputCollector Private; 
	Private the BufferedReader br; 
	Private Line String; 
	
	@Override 
	public void nextTuple () { 
		// read the data transmitted in each row 
		the try { 
			the while (! (= br.readLine Line ()) = null) { 
				// sending data to splitbolt 
				collector.emit (new new Values (Line)); 
				// set delay 
				the Thread.sleep (500); 
			} 
		} the catch (IOException E) { 
			// the TODO Auto-Generated Block the catch 
			e.printStackTrace (); 
		} the catch (InterruptedException E) { 
			// the TODO Auto-Generated Block the catch 
			e.printStackTrace (); 
		} 
		
	} 

	public void Open (the arg0 the Map, TopologyContext arg1, SpoutOutputCollector Collector) {
		this.collector=collector;
		
		//读取文件
		try {
			br=new BufferedReader(new InputStreamReader(new FileInputStream("f:/temp/storm实时统计访问量/weblog.log")));
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		//别关流!!!!
//		finally {
//			if(br!=null) {
//				try {
//					br.close();
//				} catch (IOException e) {
//					// TODO Auto-generated catch block
//					e.printStackTrace();
//				}
//			}
//		}
		
	}

	@Override
	void declareOutputFields public (OutputFieldsDeclarer declarer) { 
		//声明
		declarer.declare (new new Fields ( "logs")); 
	} 

	// tuple successful callback processing method. Like kafka that callback callback function, as well as zookeeper callback function Process 
	@Override 
	public void ACK (Object arg0) { 
		// TODO Auto-Generated Method, Stub 
		
	} 
	
	// If the spout this method is called failure mode is activated , and the command storm activate [topology name] same effect in Linux 
	@Override 
	public void the activate () { 
		// the TODO Auto-Generated Method Stub 
		
	} 

	// Close the program executed before the spout can not be guaranteed is performed kill -9 does not perform storm kill is not performed 
	@Override 
	public void use Close () { 
		// TODO Auto-Generated Method, Stub 
		
	} 

	// failure during the spout, nextTuple will not be called and storm deactivate the command in Linux [topology name] the same effect 
	@Override 
	public void the deactivate () { 
		// the TODO Auto-Generated method Stub 
		
	} 

	method of processing // tuple failure callback
	@Override
	public void fail(Object arg0) {
		// TODO Auto-generated method stub
		
	}

	//配置
	@Override
	public Map<String, Object> getComponentConfiguration() {
		// TODO Auto-generated method stub
		return null;
	}

}

  

2) split bolt

Business logic processing

Segmentation data

To get the URL

pvcount Package; 

Import a java.util.Map; 

Import org.apache.storm.task.OutputCollector; 
Import org.apache.storm.task.TopologyContext; 
Import org.apache.storm.topology.IRichBolt; 
Import org.apache.storm. topology.OutputFieldsDeclarer; 
Import org.apache.storm.tuple.Fields; 
Import org.apache.storm.tuple.Tuple; 
Import org.apache.storm.tuple.Values; 

/ ** 
 * @author Dawn 
 * @date June 2019 May 7 10:30:38 
 * @version 1.0 
 * segmentation data, get the URL 
 * / 
public class PvCountSplitBolt the implements IRichBolt { 
	
	Private OutputCollector Collector; 
	Private pvnum int = 0; 
	
	// service logic distributed cluster of concurrent threads (receiving then treated tuple) 
	@Override
	void Execute public (Tuple INPUT) { 
		// get the data. 1. 
		String = input.getStringByField Line ( "logs"); 
		
		. 2 // partitioned data 
		String [] = line.split Fields ( "\ T"); 
		String session_id Fields = [. 1]; 
		
		.. 3 // local accumulation 
		IF (session_id = null!) { 
			pvnum ++; 
			// output 
			collector.emit (new new Values (Thread.currentThread () getId (), pvnum).); 
		} 
	} 

	// initialization calls 
	@Override 
	public void PREPARE (the arg0 the Map, TopologyContext arg1, OutputCollector Collector) { 
		this.collector = Collector; 
	} 

	// declare 
	@Override 
	public void declareOutputFields (OutputFieldsDeclarer declarer) { 
		// declare an output
		declarer.declare (new new Fields ( "threadid", "pvnum")); 
	} 

	calls can not guarantee resource cleanup is called when a bolt coming off // 
	@Override 
	public void Cleanup () { 
		// TODO Auto-Generated Method, Stub 
		
	} 

	// configure 
	@Override 
	public the Map <String, Object> getComponentConfiguration () { 
		// Generated Method Stub the TODO Auto- 
		return null; 
	} 
	
}

  

3)bolt

Cumulative number of summation

package pvcount;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Tuple;

/**
 * @author Dawn
 * @date 2019年6月7日10:39:52
 * @version 1.0
 * 累加次数求和
 */
public class PvCountSumBolt implements IRichBolt{

	private OutputCollector collector;
	private HashMap<Long, Integer> hashmap=new HashMap<>();
	
	@Override
	public void cleanup() {
		
	}

	@Override 
	public void Execute (Tuple INPUT) { 
		// get the data. 1. 
		Long the threadId = input.getLongByField ( "ThreadID"); 
		Integer pvnum = input.getIntegerByField ( "pvnum"); 
		
		. 2 // Create a set storage (threadid, pvnum) 
		hashmap.put (the threadId, pvnum); 
		
		// accumulated sum. 3 (to get the value of all values in the collection). 
		; the Iterator <Integer> = hashmap.values Iterator () Iterator (). 
		
		before // 4 emptied. data 
		int SUM = 0; 
		the while (iterator.hasNext ()) { 
			SUM Iterator.next + = (); 
		} 
		
		. System.err.println (Thread.currentThread () getName () + "totals of ->" + sum ); 
	} 

	@Override 
	public void PREPARE (the arg0 the Map, TopologyContext arg1, OutputCollector Collector) { 
		
	}
 
	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		
	}

	@Override
	public Map<String, Object> getComponentConfiguration() {
		// TODO Auto-generated method stub
		return null;
	}

}

  

4)Driver

Using field grouping

pvcount Package; 

Import org.apache.storm.Config; 
Import org.apache.storm.LocalCluster; 
Import org.apache.storm.topology.TopologyBuilder; 
Import org.apache.storm.tuple.Fields; 

/ ** 
 * @author Dawn 
 * @date 2019 Nian 6 Yue 7 Ri 10:45:53 
 * 1.0 @version site traffic statistics (real-time statistics) 
 * / 
public class PvCountDriver { 
	public static void main (String [] args) { 
		// 1. create a topology 
		TopologyBuilder = new new TopologyBuilder Builder (); 

		// set 2. specify 
		builder.setSpout ( "pvcountspout", new new PvCountSpout (),. 1); 
		builder.setBolt ( "pvsplitbolt", new new PvCountSplitBolt (),. 6) .setNumTasks (. 4). fieldsGrouping ( "pvcountspout", 
				new new Fields ( "logs"));
		builder.setBolt("pvcountbolt", new PvCountSumBolt(), 1).fieldsGrouping("pvsplitbolt",
				new Fields("threadid", "pvnum"));

		// 3.创建配置信息
		Config conf = new Config();
		conf.setNumWorkers(2);
		
		// 4.提交任务
		LocalCluster localCluster = new LocalCluster();
		localCluster.submitTopology("pvcounttopology", conf, builder.createTopology());
	}

}

  

Results are as follows:

 

A total of 190 data. Then add data after completion statistics. The program will continue statistics

 

Guess you like

Origin www.cnblogs.com/hidamowang/p/10987864.html