storm学习篇(二)—— 单词计数实例

版权声明: https://blog.csdn.net/typ1805/article/details/81199114

利用storm实现简单的单词统计

添加依赖pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

    <modelVersion>4.0.0</modelVersion>
    <artifactId>storm-wordcount</artifactId>
    <packaging>jar</packaging>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <java.version>1.8</java.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-core</artifactId>
            <version>1.1.1</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <archive>
                        <manifest>
                            <mainClass>cn.ljh.storm.helloworld.ExclamationTopology</mainClass>
                        </manifest>
                    </archive>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

创建SentenceSpout.java

package com.storm.demo;

import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;

import java.util.Map;

/**
 * 路径:com.storm.demo
 * 类名:
 * 功能:《用一句话描述一下》
 * 备注:
 * 创建人:typ
 * 创建时间:2018/7/24 21:44
 * 修改人:
 * 修改备注:
 * 修改时间:
 */
public class SentenceSpout extends BaseRichSpout {

    private SpoutOutputCollector collector;

    //初始化方法
    public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
        this.collector = spoutOutputCollector;
    }

    //storm框架在while(true) 调用nextTuple
    public void nextTuple() {
        collector.emit(new Values("i am love storm"));
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("love"));
    }
}
创建SplitSentenceBolt.java
package com.storm.demo;

import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.Map;

/**
 * 路径:com.storm.demo
 * 类名:
 * 功能:《用一句话描述一下》
 * 备注:
 * 创建人:typ
 * 创建时间:2018/7/24 21:48
 * 修改人:
 * 修改备注:
 * 修改时间:
 */
public class SplitSentenceBolt extends BaseRichBolt {

    private OutputCollector collector;

    //初始化方法
    public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
        this.collector = outputCollector;
    }

    //被storm框架while(true) 循环调用 传入参数tuple
    public void execute(Tuple tuple) {
        String line = tuple.getString(0);
        String[] words = line.split(" ");
        for(String word : words){
            this.collector.emit(new Values(word,1));
        }

    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("word","count"));
    }
}

创建WordCountBolt.java

package com.storm.demo;

import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple;

import java.util.HashMap;
import java.util.Map;

/**
 * 路径:com.storm.demo
 * 类名:
 * 功能:《用一句话描述一下》
 * 备注:
 * 创建人:typ
 * 创建时间:2018/7/24 21:52
 * 修改人:
 * 修改备注:
 * 修改时间:
 */
public class WordCountBolt extends BaseRichBolt {

    private OutputCollector collector;

    private HashMap<String,Integer> map = new HashMap<String, Integer>();

    public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
        this.collector = outputCollector;
    }

    public void execute(Tuple tuple) {
        String word = tuple.getString(0);
        Integer num = tuple.getInteger(1);
        if(map.containsKey(word)){
            Integer count = map.get(word);
            map.put(word,count+num);
        }else{
            map.put(word,1);
        }
        System.out.println("count"+map);
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
//        declarer.declare(new Fields("word","count"));
    }
}

创建WordCountTopology.java

package com.storm.demo;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;

/**
 * 路径:com.storm.demo
 * 类名:
 * 功能:《用一句话描述一下》
 * 备注:
 * 创建人:typ
 * 创建时间:2018/7/24 22:02
 * 修改人:
 * 修改备注:
 * 修改时间:
 */
public class WordCountTopology {

    public static void main(String[] args) throws Exception {

        //定义一个TopologyBuilder
        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout",new SentenceSpout(),1);
        builder.setBolt("myBolt1",new SplitSentenceBolt(),10).shuffleGrouping("spout");
        builder.setBolt("myBolt2",new WordCountBolt(),2).fieldsGrouping("myBolt1",new Fields("word"));

        //创建一个Config,用来指定当前topology需要的worker的数量
        Config config = new Config();
        config.setNumWorkers(2);

        //提交任务的两种模式:本地模式和远程模式

        //1.远程模式
//        StormSubmitter.submitTopology("mywordcount",config,builder.createTopology());
        //2.本地模式
        LocalCluster localCluster = new LocalCluster();
        localCluster.submitTopology("mywordcount",config,builder.createTopology());
    }
}

运行结果:

猜你喜欢

转载自blog.csdn.net/typ1805/article/details/81199114