文章目录


基于direct模式,消息语句中增加kafka的时间戳

pom.xml

<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>2.2.0</version>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.2.0</version>
</dependency>
</dependencies>

代码

import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.{Seconds, StreamingContext}

object DirectStream {
def main(args: Array[String]): Unit = {

val group = "myorder"
val topic = "time"
val conf = new SparkConf().setAppName("DirectStream").setMaster("local[2]")
val streamingContext = new StreamingContext(conf, Seconds(5))

val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "note01:9092,note02:9092,note03:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> group,
"auto.offset.reset" -> "earliest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array(topic)
val stream = KafkaUtils.createDirectStream[String, String](
streamingContext,
PreferConsistent,
Subscribe[String, String](topics, kafkaParams)
)
stream.foreachRDD { rdd =>
val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
rdd.foreach{ line =>
println(line.value()+"时间戳:"+line.timestamp())

}
stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges)
}

streamingContext.start()
streamingContext.awaitTermination()
}
}