案例
def main(args: Array[String]): Unit = {
// 1.创建sparkconf
val conf = new SparkConf()
.setMaster("local")
.setAppName("test-sql")
// 2.创建sparksession
val session: SparkSession = SparkSession
.builder()
.config(conf)
.getOrCreate()
// 3.创建数据表并读取数据 , 并创建了student的数据表(视图)
// 读取本地student.json 文件。
//{"id": 1 , "name" : "Kate" , "age" : 29}
//{"id": 2 , "name" : "Andy" , "age" : 39}
//{"id": 3 , "name" : "Tony" , "age" : 10}
session
.read
.json("D:\\daima\\work\\1011\\spark-test-zhonghuashishan\\src\\test\\file\\student.json")
.createOrReplaceTempView("student")
// SQL查询
session.sql("select name from student where age > 18 ").show()
}
class GenericInternalRow(val values: Array[Any]) extends BaseGenericInternalRow {
/** No-arg constructor for serialization. */
protected def this() = this(null)
def this(size: Int) = this(new Array[Any](size))
// 也是直接根据下表访问的
override protected def genericGet(ordinal: Int) = values(ordinal)
override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = values.clone()
override def numFields: Int = values.length
override def setNullAt(i: Int): Unit = { values(i) = null}
override def update(i: Int, value: Any): Unit = { values(i) = value }
}
// 在TreeNode类中
case class Origin(
line: Option[Int] = None, // 行号
startPosition: Option[Int] = None) // 偏移量
object CurrentOrigin {
private val value = new ThreadLocal[Origin]() {
override def initialValue: Origin = Origin()
}
def get: Origin = value.get()
def set(o: Origin): Unit = value.set(o)
def reset(): Unit = value.set(Origin())
def setPosition(line: Int, start: Int): Unit = {
value.set(
value.get.copy(line = Some(line), startPosition = Some(start)))
}
def withOrigin[A](o: Origin)(f: => A): A = {
set(o)
val ret = try f finally { reset() }
ret
}
}