package example
import org.apache.spark.util.AccumulatorV2
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable
object AccumulatorTest2 {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local").setAppName("WordCount")
val sc = new SparkContext(sparkConf);
val myAccumulator = new MyAccumulator();
sc.register(myAccumulator,"myAccumulator");
val myRdd = sc.makeRDD(List("hello", "world","hello","spark","hi","hi"))
myRdd.foreach(
num => {
myAccumulator.add(num);
}
)
println(myAccumulator.value)
}
class MyAccumulator extends AccumulatorV2[String,mutable.Map[String, Long]]{
private var wordCountMap= mutable.Map[String, Long]();
override def isZero: Boolean = wordCountMap.isEmpty;
override def copy(): AccumulatorV2[String, mutable.Map[String, Long]] = {
new MyAccumulator();
}
override def reset(): Unit = {
wordCountMap.clear();
}
override def add(word: String): Unit = {
var newValue = wordCountMap.getOrElse(word, 0L) +1 ;
wordCountMap.update(word, newValue);
}
override def merge(other: AccumulatorV2[String, mutable.Map[String, Long]]): Unit = {
var map1 = this.wordCountMap;
var map2 = other.value;
map2.foreach {
case (word, count) => {
var oldValue = map1.getOrElse(word, 0L);
map1.update(word, oldValue + count);
}
}
}
override def value: mutable.Map[String, Long] = {
return wordCountMap;
}
}
}