package com.immooc.spark


import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{Encoders, SparkSession}

object DataSetTest {

case class Person(name:String, age:Long)

def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local[2]").setAppName("DataSetTest")

val ssc = new SparkContext(sparkConf)



val spark = SparkSession
.builder()
.appName("Spark SQL basic example")
.config("spark.some.config.option", "some-value")
.getOrCreate()


import spark.implicits._



val df = spark.read.json("file:///usr/local/Cellar/spark-2.3.0/examples/src/main/resources/people.json")


val peopleDS = df.as[Person]

peopleDS.map(line => (line.name, line.age)).show()
//peopleDS.show()

spark.close()
}
}

spark dataframe 转成 dataset_json

dataset 主要是强类型的。