union 并集

		SparkConf conf = new SparkConf();
		conf.setMaster("local").setAppName("union");
		JavaSparkContext sc = new JavaSparkContext(conf);
		JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3),3);
		JavaRDD<Integer> rdd2 = sc.parallelize(Arrays.asList(4,5,6),2);
		JavaRDD<Integer> union = rdd1.union(rdd2);
		System.out.println("union.partitions().size()---"+union.partitions().size());
		union.foreach(new VoidFunction<Integer>() {
			private static final long serialVersionUID = 1L;
			@Override
			public void call(Integer t) throws Exception {
				System.out.println(t);
			}
		});
		sc.stop();

结果

union.partitions().size()---5
1
2
3
4
5

intersection交集

注意RDD间的格式要一致

	SparkConf conf = new SparkConf();
		conf.setMaster("local").setAppName("intersection");
		JavaSparkContext sc = new JavaSparkContext(conf);
		JavaRDD<String> rdd1 = sc.parallelize(Arrays.asList("a","b","c"));
		JavaRDD<String> rdd2 = sc.parallelize(Arrays.asList("a","e","f"));
        JavaRDD<String> intersection = rdd1.intersection(rdd2);
        System.out.println(intersection.partitions().size());
		intersection.foreach(new VoidFunction<String>() {
			private static final long serialVersionUID = 1L;

			@Override
			public void call(String t) throws Exception {
				System.out.println(t);
			}
		});
		sc.stop();
		

subtract 差集

	SparkConf conf = new SparkConf();
		conf.setMaster("local").setAppName("subtract");
		JavaSparkContext sc = new JavaSparkContext(conf);
		JavaRDD<String> rdd1 = sc.parallelize(Arrays.asList("a","b","c"));
		JavaRDD<String> rdd2 = sc.parallelize(Arrays.asList("a","e","f"));
		//subtract取差集,两个RDD的类型要一致。
//		JavaRDD<String> subtract = rdd1.subtract(rdd2);
		JavaRDD<String> subtract = rdd2.subtract(rdd1);
		subtract.foreach(new VoidFunction<String>() {
			private static final long serialVersionUID = 1L;
			@Override
			public void call(String t) throws Exception {
				System.out.println(t);
			}
		});
		sc.stop();

结果

第一个结果
bc
第二个结果
ef