一、文件内容

world
hello
www
cassie
wwwhello
www
lisi
world
cassie
hello

二、操作类

package com.day.scala.sql


import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.DataTypes
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.types.StructField
/**
* scala sql版本的单词统计
*/
object WordCount {
def main(args:Array[String]):Unit={
//获取context
val sc=new SparkContext(new SparkConf().setAppName("word").setMaster("local[1]"))
//获取sqlContext
val sqlContext=new SQLContext(sc)
//加载RDD
val lines=sc.textFile("dataframeword.txt")
//转换rows
val rows=lines.map { x => Row(x) }
//创建structField
val field=Array(DataTypes.createStructField("name", DataTypes.StringType, true))
//创建structType
val structType=DataTypes.createStructType(field)
//等价于一句话: val structTy=StructType(Array(StructField("name", StringType, true)))
//创建dataframe
val df=sqlContext.createDataFrame(rows, structType)
//注册表
df.registerTempTable("t_word")
//注册函数
sqlContext.udf.register("str", (name:String)=>1)
//sql语句查询返回结果
sqlContext.sql("select name,str(name) from t_word ").groupBy(df.col("name")).count().show
//关闭资源
sc.stop()
}



}

三、结果

+--------+-----+
| name|count|
+--------+-----+
| hello| 2|
| cassie| 2|
|wwwhello| 1|
| lisi| 1|
| world| 2|
| www| 2|
+--------+-----+