kafka作为生产者,把生产的数据保存到Redis中,读取的是JSON文件,需要导入阿里的包
一、pom文件进行设置
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>com.typesafe</groupId>
<artifactId>config</artifactId>
<version>1.3.1</version>
</dependency>
<dependency>
<groupId>org.scalikejdbc</groupId>
<artifactId>scalikejdbc_2.11</artifactId>
<version>2.5.0</version>
</dependency>
<dependency>
<groupId>org.scalikejdbc</groupId>
<artifactId>scalikejdbc-core_2.11</artifactId>
<version>2.5.0</version>
</dependency>
<dependency>
<groupId>org.scalikejdbc</groupId>
<artifactId>scalikejdbc-config_2.11</artifactId>
<version>2.5.0</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.36</version>
</dependency>
二、kafka直连API
import com.alibaba.fastjson.JSON
import kafka.common.TopicAndPartition
import kafka.message.MessageAndMetadata
import kafka.serializer.StringDecoder
import kafka.utils.{ZKGroupTopicDirs, ZkUtils}
import org.I0Itec.zkclient.ZkClient
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka.{HasOffsetRanges, KafkaUtils, OffsetRange}
import org.apache.spark.streaming.{Duration, StreamingContext}
/**
* Direct直连方式
*/
object KafkaDirectLink {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Direct").setMaster("local[2]")
val ssc = new StreamingContext(conf,Duration(5000))
//指定组名
val groupId = "gp01"
//指定消费的topic名字
val topic = "m"
//指定kafka的Broker地址(SparkStreaming的Task直接连接到Kafka分区上,用的是底层API消费)
val brokerList ="192.168.146.131:9092,192.168.146.132:9092,192.168.146.133:9092"
//接下来我们要自己维护offset了,将offset保存到ZK中
val zkQuorum = "192.168.146.131:2181,192.168.146.131:2181,192.168.146.131:2181"
//创建stream时使用的topic名字集合,SparkStreaming可以同时消费多个topic
val topics:Set[String] = Set(topic)
//创建一个ZkGroupTopicDirs对象,其实是指定往Zk中写入数据的目录
// 用于保存偏移量
val TopicDirs = new ZKGroupTopicDirs(groupId,topic)
//获取zookeeper中的路径“/gp01/offset/tt/”
val zkTopicPath = s"${TopicDirs.consumerOffsetDir}"
//准备kafka参数
val kafkas = Map(
"metadata.broker.list"->brokerList,
"group.id"->groupId,
//从头开始读取数据
"auto.offset.reset"->kafka
.api.OffsetRequest.SmallestTimeString
)
// zookeeper 的host和ip,创建一个client,用于更新偏移量
// 是zookeeper客户端,可以从zk中读取偏移量数据,并更新偏移量
val zkClient = new ZkClient(zkQuorum)
//"/gp01/offset/tt/0/10001"
//"/gp01/offset/tt/1/20001"
//"/gp01/offset/tt/2/30001"
val clientOffset = zkClient.countChildren(zkTopicPath)
// 创建KafkaStream
var kafkaStream :InputDStream[(String,String)]= null
//如果zookeeper中有保存offset 我们会利用这个offset作为KafkaStream的起始位置
//TopicAndPartition [/gp01/offset/tt/0/ , 8888]
var fromOffsets:Map[TopicAndPartition,Long] = Map()
//如果保存过offset
if(clientOffset > 0){
//clientOffset 的数量其实就是 /gp01/offset/tt的分区数目
for(i<-0 until clientOffset){
// /gp01/offset/tt/ 0/10001
val partitionOffset = zkClient.readData[String](s"$zkTopicPath/${i}")
// tt/0
val tp = TopicAndPartition(topic,i)
//将不同partition 对应得offset增加到fromoffset中
// tt/0 -> 10001
fromOffsets += (tp->partitionOffset.toLong)
}
// key 是kafka的key value 就是kafka数据
// 这个会将kafka的消息进行transform 最终kafka的数据都会变成(kafka的key,message)这样的Tuple
val messageHandler = (mmd:MessageAndMetadata[String,String])=>
(mmd.key(),mmd.message())
// 通过kafkaUtils创建直连的DStream
//[String,String,StringDecoder, StringDecoder,(String,String)]
// key value key解码方式 value的解码方式 接收数据的格式
kafkaStream = KafkaUtils.createDirectStream
[String,String,StringDecoder,
StringDecoder,(String,String)](ssc,kafkas,fromOffsets,messageHandler)
}else{
//如果未保存,根据kafkas的配置使用最新的或者最旧的offset
kafkaStream = KafkaUtils.createDirectStream
[String,String,StringDecoder,StringDecoder](ssc,kafkas,topics)
}
//偏移量范围
var offsetRanges = Array[OffsetRange]()
// 依次迭代DStream中的RDD
kafkaStream.foreachRDD{
//对RDD进行操作 触发Action
kafkardd=>
offsetRanges = kafkardd.asInstanceOf[HasOffsetRanges].offsetRanges
//下面 你就可以写自己的业务逻辑了
val value = kafkardd.map(t=>JSON.parseObject(t._2))
// IndexStatistics.getSum(value)
IndexStatistics.sumFail(value)
for(o<-offsetRanges){
// /gp01/offset/tt/ 0
val zkpath = s"${TopicDirs.consumerOffsetDir}/${o.partition}"
//将该partition的offset保存到zookeeper中
// /gp01/offset/tt/ 0/88889
ZkUtils.updatePersistentPath(zkClient,zkpath,o.untilOffset.toString)
}
}
// 启动
ssc.start()
ssc.awaitTermination()
}
}
三、redis连接池
package day04.ChinaMobil
import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig}
/**
* redis连接池
*/
object JedisConnectionPool {
def getConnection(): Jedis ={
//获取连接
val config = new JedisPoolConfig()
//最大空闲连接数
config.setMaxIdle(20)
//最大连接数
config.setMaxIdle(20)
//建立连接需要连接,ip,端口号 还可以加超时时间和密码
val pool = new JedisPool(config,"192.168.146.131",6379)
//获取通道
pool.getResource
}
}
四、读取JSON文件,并保存到Redis中
import com.alibaba.fastjson.JSONObject
import day04.{GetSum, Utils}
import org.apache.spark.rdd.RDD
//统计全网的充值订单量, 充值金额, 充值成功数及充值平均时长.
object IndexStatistics {
def getSum(dataBase: (RDD[JSONObject])): Unit = {
val database1: RDD[(String, String, String, List[Double])] =
dataBase.filter(_.getString("serviceName")
.equals("reChargeNotifyReq"))
.map(t => {
//通过key获得value
val result = t.getString("bussinessRst")
//如果结果是0000则表示成功,并获取充值金额
val money = if (result.equals("0000"))
t.getString("chargefee").toDouble else 0
//充值成功数,后续进行累加
val isSucc = if (result.equals("0000")) 1 else 0
//充值开始时间
val startTime = t.getString("requestId")
//充值结束时间
val stopTime = t.getString("receiveNotifyTime")
val costTime = if (result.equals("0000"))
Utils.costtime(startTime, stopTime) else 0
//每天的数据
(startTime.substring(0, 8),
//每小时的数据
startTime.substring(0, 10),
//每分钟的数据
startTime.substring(0, 12),
List[Double](1, money, isSucc, costTime))
})
val re: RDD[(String, List[Double])] = database1.map(t => (t._1, t._4))
//reduceByKey聚合的是value
val re1: RDD[(String, List[Double])] = re.reduceByKey((list1, list2) => {
//元祖内的两个元素相加
val doubles: List[Double] = list1.zip(list2).map(t => t._1 + t._2)
doubles
})
re1.foreachPartition(t => {
val jedis = GetSum.getConnection()
t.foreach(t => {
//充值总单数
jedis.hincrBy(t._1, "total", t._2(0).toLong)
//充值总金额
jedis.hincrByFloat(t._1, "money", t._2(1))
//总的成功数
jedis.hincrBy(t._1, "successSum", t._2(2).toLong)
//总时长
jedis.hincrBy(t._1, "sumTime", t._2(3).toLong)
})
jedis.close()
})
}