pom.xml文件,支持打包
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http:///POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http:///POM/4.0.0 http:///xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>ht-spark</groupId>
<artifactId></artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>1.6.0</version>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.2</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>process-resources</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
主要代码
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
object SqlDemo01 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("SqlDemo01")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val lines = sc.textFile("")
val boyRDD = lines.map(line => {
val fields = line.split(",")
val id = fields(0).toLong
val name = fields(1)
Boy(id, name)
})
import sqlContext.implicits._
val bdf = boyRDD.toDF
bdf.registerTempTable("t_boy")
val result = sqlContext.sql("select * from t_boy ")
// result.show()
result.write.format("parquet").mode("overwrite").save("")
sc.stop()
}
}
case class Boy(id : Long, name : String)- 先创建一个sparkContext,然后再创建SQLContext
- 先创建RDD,对数据进行整理,然后关联case class 将非结构化转换成结构化数据
- 注册临时表
- 执行sql,这是转换算子,是lazy,处于懒执行
- 执行action
package sql
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
object sqlDemo02 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("sqlDemo02")
val sc = new SparkContext(conf)
val context = new SQLContext(sc)
val lines = sc.textFile("C:\\Users\\admin\\Desktop\\sql.txt")
//数据整理
val rowRDD = lines.map(line => {
val fields = line.split(",")
val id = fields(0)
val name = fields(1)
Row(id, name)
})
//结果类型,其实就是表头,用于描述DataFrame
val sch = StructType(
List(
//字段名称,类型,是否为空
StructField("id", StringType, true),
StructField("name", StringType)
)
)
val df = context.createDataFrame(rowRDD,sch)
df.registerTempTable("person")
val result = context.sql("select count(*) from person")
result.show()
result.write.format("txt").save("C:\\Users\\admin\\Desktop\\result")
sc.stop()
}
}
- 先创建一个sparkContext,然后再创建SQLContext
- 先创建RDD,对数据进行整理,然后关联ROW 将非结构化转换成结构化数据
- 定义schema
- 调用SQLContext的createDataFram方法
- 注册临时表
- 执行sql,这是转换算子,是lazy,处于懒执行
- 执行action
package sql
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
object sqlDemo02 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("sqlDemo02")
val sc = new SparkContext(conf)
val context = new SQLContext(sc)
val lines = sc.textFile("C:\\Users\\admin\\Desktop\\sql.txt")
//数据整理
val rowRDD = lines.map(line => {
val fields = line.split(",")
val id = fields(0).toInt
val name = fields(1)
Row(id, name)
})
//结果类型,其实就是表头,用于描述DataFrame
val sch = StructType(
List(
//字段名称,类型,是否为空
StructField("id", IntegerType, true),
StructField("name", StringType)
)
)
val df = context.createDataFrame(rowRDD,sch)
// df.registerTempTable("person")
//不注册临时表的形式
val frame1 = df.select("id","name")
// val result = context.sql("select count(*) from person")
import context.implicits._
val result = frame1.orderBy($"id" desc)
result.show()
result.write.format("json").save("C:\\Users\\admin\\Desktop\\result")
sc.stop()
}
}
















