见下面代码实现

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.DataTypes
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.types.StructField
/**
* scala sql版本的单词统计
*/
object WordCount {
def main(args:Array[String]):Unit={
//1\获取context
val sc=new SparkContext(new SparkConf().setAppName("word").setMaster("local[1]"))
//2\获取sqlContext
val sqlContext=new SQLContext(sc)
//3\加载RDD
val lines=sc.textFile("dataframeword.txt")
//4\转换rows
val rows=lines.map { x => Row(x) }
//5\创建structField
val field=Array(DataTypes.createStructField("name", DataTypes.StringType, true))
//6\创建structType
val structType=DataTypes.createStructType(field)
//等价于一句话: val structTy=StructType(Array(StructField("name", StringType, true)))
//7\创建dataframe
val df=sqlContext.createDataFrame(rows, structType)
//8\注册表
df.registerTempTable("t_word")
//9\注册函数
sqlContext.udf.register("str", (name:String)=>1)
//10sql语句查询返回结果
sqlContext.sql("select name,str(name) from t_word ").groupBy(df.col("name")).count().show
//关闭资源
sc.stop()
}
}

参考:https://rklicksolutions.wordpress.com/2016/03/03/tutorial-spark-1-6-sql-and-dataframe-operations/