-  需求实时统计发射到Storm框架中单词的总数 
-  分析设计一个topology,来实现对文档里面的单词出现的频率进行统计,整个topology分为三个部分 (1)WordCountSpot:数据源,在已知的英文句子中,随机发送一条句子出去 package storm.wordcount; import org.apache.storm.spout.SpoutOutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseRichSpout; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Values; import java.util.Map; //发送一条语句 public class WordCountSpout extends BaseRichSpout { private SpoutOutputCollector collector; @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.collector = collector; } @Override public void nextTuple() { //发送数据 collector.emit(new Values("shnad zhang1 zhsndga1 dasd a a b b c dd d dd")); //延时0.5 s try { Thread.sleep(500); } catch (InterruptedException e) { e.printStackTrace(); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("love")); } }(2)WordCountSplitBolt:负责将单行文本记录(句子),切分成单词 package storm.wordcount; import org.apache.storm.task.OutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseRichBolt; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Tuple; import org.apache.storm.tuple.Values; import java.util.Map; public class WordCountSplitBolt extends BaseRichBolt{ private OutputCollector collector; @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; } @Override //接受数据 public void execute(Tuple input) { //1. 获取数据 String line = input.getString(0); //2 截取数据 String[] splits = line.split(" "); //3 发送出去 for (String word : splits) { collector.emit(new Values(word,1)); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { //声明字段 declarer.declare(new Fields("word", "num")); } }(3)WordCountBolt:负责对单词的频率进行累加 package storm.wordcount; import org.apache.storm.task.OutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseRichBolt; import org.apache.storm.tuple.Tuple; import java.util.HashMap; import java.util.Map; public class WordCountBolt extends BaseRichBolt { //单词为key,单词出现的次数为value private Map<String, Integer> map = new HashMap<>(); private OutputCollector collector; @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; } @Override public void execute(Tuple input) { //1 获取传递过来的数据 String word = input.getString(0); Integer num = input.getInteger(1); //2 业务逻辑 if (map.containsKey(word)) { //如果之前统计过有单词的个数,获取个数 Integer count = map.get(word); map.put(word, count + num); } else { map.put(word, num); } // 3 控制台打印 System.err.println(Thread.currentThread().getId() + " word : " + word + " num: " + map.get(word)); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { } }(4)WordCountMain驱动  1.创建拓扑对象 2.设置spout 3.配置worker开启个数 4. 提交 package storm.wordcount; import org.apache.storm.Config; import org.apache.storm.LocalCluster; import org.apache.storm.StormSubmitter; import org.apache.storm.topology.TopologyBuilder; import org.apache.storm.tuple.Fields; public class WordCountMain { public static void main (String[] args){ //1 创建拓扑 TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("WordCountSpout",new WordCountSpout(),1); builder.setBolt("WordCountSplitBolt",new WordCountSplitBolt(),2).shuffleGrouping("WordCountSpout"); builder.setBolt("WordCountBolt",new WordCountBolt(),4).fieldsGrouping("WordCountSplitBolt",new Fields("word")); //2 创建配置信息 Config conf = new Config(); conf.setNumWorkers(2); //3 提交 if (args.length > 0){ try { StormSubmitter.submitTopology(args[0],conf,builder.createTopology()); } catch (Exception e) { e.printStackTrace(); } }else { LocalCluster cluster = new LocalCluster(); cluster.submitTopology("wordtopology",conf,builder.createTopology()); } } }
Storm实现单词统计案例
原创wx5ba7ab4695f27 ©著作权
             ©著作权归作者所有:来自51CTO博客作者wx5ba7ab4695f27的原创作品,请联系作者获取转载授权,否则将追究法律责任        
            上一篇:Storm实时计算网站pv
下一篇:storm的并发度
 
            
        
                提问和评论都可以,用心的回复会被更多人看到
                评论
            
            
                    发布评论
                
            
            相关文章
        
        
- 
                    Kafka:Streams实现单词统计测试代码pom.xml:<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org kafka zookeeper 分布式 apache java
- 
                    mapreduce实现统计单词Hello you Hello me1.1 读取hdfs中的文件。每一行解析成一个<k,v>。每一个键值对调用一次map函数解析成2个<k,v>,分别是<0,hello you> <10,hello me>调用2次map函数1.2覆盖map()函数,接受1.1的< mapreduce实现统计单词 mapreduce 原理 hadoop apache Text
 
 
                    













 
                    

 
                 
                    