Storm 流计算实现wordcount(单词统计)

设置top的并发程度和任务
    配置并发度.
    1.设置worker数据
        conf.setNumWorkers(1);

    2.设置executors个数
        //设置Spout的并发暗示 (executor个数)
        builder.setSpout("wcspout", new WordCountSpout(),3);

        //设置bolt的并发暗示
        builder.setBolt("split-bolt", new SplitBolt(),4)

    3.设置task个数
        每个线程可以执行多个task.
        builder.setSpout("wcspout", new WordCountSpout(),3).setNumTasks(2);
        //
        builder.setBolt("split-bolt", new SplitBolt(),4).shuffleGrouping("wcspout").setNumTasks(3);

    4.并发度 ==== 所有的task个数的总和。
        

使用storm流计算实现wordcount

WordCountSpout 类:

package com.mao.storm.wordCount;

import com.mao.storm.util.Util;
import javafx.scene.paint.RadialGradient;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichSpout;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;

public class WordCountSpout implements IRichSpout {

    private TopologyContext context;
    private SpoutOutputCollector collector;
    List<String> stats;
    public void open(Map map, TopologyContext context, SpoutOutputCollector collector) {
        Util.sendToClient(this,"open()");
        this.collector = collector;
        this.context = context;
        stats = new ArrayList<String>();
        stats.add("hollo world tom");
        stats.add("hollo world tom1");
        stats.add("hollo world tom2");
    }
    private Random r = new Random();

    public void close() {

    }

    public void activate() {

    }

    public void deactivate() {

    }

    public void nextTuple() {
        Util.sendToClient(this,"nextTuple()");
        String line = stats.get(r.nextInt(3));
        collector.emit(new Values(line));
        try {
            Thread.sleep(1000);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void ack(Object o) {

    }

    public void fail(Object o) {

    }

    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        outputFieldsDeclarer.declare(new Fields("line"));
    }

    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

SplitBolt 类:

package com.mao.storm.wordCount;

import com.mao.storm.util.Util;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.Map;

public class SplitBolt implements IRichBolt {

    private TopologyContext context;
    private OutputCollector collector;

    public void prepare(Map map, TopologyContext context, OutputCollector collector) {
        Util.sendToClient(this,"prepare()");
        this.context = context;
        this.collector = collector;
    }

    public void execute(Tuple tuple) {
        Util.sendToClient(this,"execute()");
        String line = tuple.getString(0);
        String[] arr = line.split(" ");
        for (String s : arr){
            collector.emit(new Values(s,1));
        }
    }

    public void cleanup() {

    }

    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        outputFieldsDeclarer.declare(new Fields("word","count"));
    }

    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}
WordCountBolt 类:
package com.mao.storm.wordCount;

import com.mao.storm.util.Util;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Tuple;

import java.util.HashMap;
import java.util.Map;

public class WordCountBold implements IRichBolt {
    private TopologyContext context;
    private OutputCollector collector;
    private Map<String,Integer> map1;

    public void prepare(Map map, TopologyContext context, OutputCollector collector) {
        Util.sendToClient(this,"prepare()");
        this.context = context;
        this.collector = collector;
        map1 = new HashMap<String, Integer>();
    }


    public void execute(Tuple tuple) {
        Util.sendToClient(this,"execute("+tuple.toString()+")");
        String word = tuple.getString(0);
        Integer count = tuple.getInteger(1);
        if (!map1.containsKey(word)){
            map1.put(word,1);
        }else {
            map1.put(word,map1.get(word) + count);
        }
    }

    public void cleanup() {
        for (Map.Entry<String,Integer> entry : map1.entrySet()){
            System.out.println("wordCountNums:"+entry.getKey()+" : "+entry.getValue());
        }
    }

    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {

    }

    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

APP:

package com.mao.storm.wordCount;


import org.apache.storm.Config;
import org.apache.storm.StormSubmitter;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;

/**
 * App
 */
public class App {

    public static void main(String[] args) throws Exception {

        TopologyBuilder builder = new TopologyBuilder();

        //设置spout
        builder.setSpout("wcSpout",new WordCountSpout(),3).setNumTasks(3);
        //设置creator-Bolt
        builder.setBolt("split-bolt",new SplitBolt(),4).shuffleGrouping("wcSpout").setNumTasks(4);
        //设置countor-Bolt
        builder.setBolt("count-Bolt",new WordCountBold(),5).fieldsGrouping("split-bolt",new Fields("word")).setNumTasks(5);

        Config config = new Config();
        config.setNumWorkers(2);
        config.setDebug(true);

        StormSubmitter.submitTopology("wordCount",config,builder.createTopology());

    }
}

 

猜你喜欢

转载自blog.csdn.net/mao502010435/article/details/89514605