foreachPartition:遍历的数据是每个 partition 的数据。

  1. java
package action;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
* @Author yqq
* @Date 2021/12/10 11:17
* @Version 1.0
*/
public class ForeachPartitionTest {
public static void main(String[] args) {
JavaSparkContext context = new JavaSparkContext(
new SparkConf()
.setMaster("local")
.setAppName("ForeachPartition")
);
context.setLogLevel("Error");
context.parallelize(Arrays.asList("a","b","c","d"),2)
.foreachPartition(e->{//e:每个分区中有多个元素
List<String> list = new ArrayList<>();
System.out.println("建立数据库");
while (e.hasNext())
list.add(e.next());
System.out.println("插入数据库"+list.toString());
System.out.println("关闭数据库连接");
});
}
}

Spark Action算子->foreachPartition_java
2. scala

package action

import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable.ListBuffer

/**
* @Author yqq
* @Date 2021/12/10 11:29
* @Version 1.0
*/
object ForeachPartitionTest {
def main(args: Array[String]): Unit = {
val context = new SparkContext(
new SparkConf()
.setMaster("local")
.setAppName("ForeachPartition")
)
context.setLogLevel("Error")
context.parallelize(Array[String]("a","b","c","d"),2)
.foreachPartition(e=>{
val buffer = new ListBuffer[String]()
println("建立数据库连接")
while (e.hasNext)
buffer.append(e.next())
println("插入数据库")
println("关闭数据库")
})
}
}

Spark Action算子->foreachPartition_apache_02