代码
demo01是对象 demo02是基本类型
package com
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
/**
* 定义一个对象
*
* @param name
* @param age
*/
case class User(name: String, age: Int)
object RDD2DF {
/** 对象
*/
def demo01(): Unit = {
val spark: SparkSession = SparkSession.builder()
.master("local[2]")
.appName("RDD2DF")
.getOrCreate()
import spark.implicits._
val sc: SparkContext = spark.sparkContext
// val rdd = sc.parallelize(1 to 10)
val rdd = sc.parallelize(Array(User("lisi", 10), User("zs", 20), User("ww", 15)))
rdd.toDF("name", "age").show
spark.stop()
}
/**
* 基本数据类型
*/
def demo02(): Unit = {
val spark: SparkSession = SparkSession.builder()
.master("local[2]")
.appName("RDD2DF")
.getOrCreate()
import spark.implicits._
val sc: SparkContext = spark.sparkContext
val rdd: RDD[(String, Int)] = sc.parallelize(("lisi", 10) :: ("zs", 20) :: Nil)
rdd.toDF("name", "name").show
spark.stop()
}
def main(args: Array[String]): Unit = {
demo01() //对象
// demo02() // 基本类型
}
}
demo01输出
+----+---+
|name|age|
+----+---+
|lisi| 10|
| zs| 20|
| ww| 15|
+----+---+
demo02 输出
+----+----+
|name|name|
+----+----+
|lisi| 10|
| zs| 20|
+----+----+