文章目录
Receiver 模式
linux中产生生产者
--broker-list note01:9092,note02:9092,note03:9092 --topic time
具体代码
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("KafkaStreaming")
val ssc = new StreamingContext(conf, Seconds(3))
val zkHost = "192.168.18.100:2181,192.168.18.101:2181,192.168.18.102:2181"
val groupId = "g1"
val topic = Map[String,Int]("time" -> 1)
val data: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(ssc,zkHost,groupId,topic)
//对数据进行处理ReceiverInputDStream 里面装的是一个元组(key是写入的key,value是实际的数据)
val lines: DStream[String] = data.map(_._2)
lines.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).print()
ssc.start()
ssc.awaitTermination()
}基于Receiver模式可更新key状态的wordCount
/*
第一个参数: 聚合key 即单词
第二个参数: 当前批次产生该单词在每一个分区中出现的次数
第三个参数: 初始值或累加的中间结果
*/
val updataFunc = (iter: Iterator[(String,Seq[Int],Option[Int])]) => {
// iter.map(t => (t._1,t._2.sum + t._3.getOrElse(0))) 不建议这样写
iter.map{
case (x , y ,z) => (x,y.sum+z.getOrElse(0))
}
}
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("KafkaStreaming")
val ssc = new StreamingContext(conf, Seconds(3))
val zkHost = "192.168.18.100:2181,192.168.18.101:2181,192.168.18.102:2181"
val groupId = "g1"
val topic = Map[String, Int]("time" -> 1)
//需要将中间结果输出到checkPoint目录中,容错
ssc.checkpoint("./ck")
val data: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(ssc, zkHost, groupId, topic)
//对数据进行处理ReceiverInputDStream 里面装的是一个元组(key是写入的key,value是实际的数据)
val lines: DStream[String] = data.map(_._2)
//updateStateByKey 传入一个函数 ,
//updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)], 处理函数
//partitioner: Partitioner, 采用默认分区器
//rememberPartitioner: Boolean 是否复用分区器
lines.flatMap(_.split(" ")).map((_, 1)).updateStateByKey(updataFunc, new HashPartitioner(ssc.sparkContext.defaultParallelism), true).print()
ssc.start()
ssc.awaitTermination()
}
存在问题,程序挂掉无法读取前面的数据,因此采用redis进行处理
引入log4j去除冗余日志
# Global logging configuration
log4j.rootLogger=WARN, stdout
# Console output...
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%5p [%t] - %m%n
















