首先我们来看SPARK_HOME/bin/spark-shell脚本,内容:
function main() {
if $cygwin; then
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
stty icanon echo > /dev/null 2>&1
else
export SPARK_SUBMIT_OPTS
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
fi
}
可以看到使用的是org.apache.spark.repl.Main为入口。
1.查阅repl.Main源代码,主要设计如下内容
1.1定义私有变量 conf,sparkContext,sparkSession,SparkILoop,hasErrors=false用来标记错误,代码如下:
object Main extends Logging {
val conf = new SparkConf()
var sparkContext: SparkContext = _
var sparkSession: SparkSession = _
// this is a public var because tests reset it.
var interp: SparkILoop = _
private var hasErrors = false
private def scalaOptionError(msg: String): Unit = {
hasErrors = true
Console.err.println(msg)
}
def main(args: Array[String]) {
doMain(args, new SparkILoop)
}
}
class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) extends ILoop(in0, out) {
def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out)
def initializeSpark() {
intp.beQuietDuring {
//创建sparkSession,使用transient修饰符来标识一个成员变量在序列化子系统中应被忽略
//调用父类ILoop的processLine方法
processLine("""
@transient val spark = if (org.apache.spark.repl.Main.sparkSession != null) {
org.apache.spark.repl.Main.sparkSession
} else {
org.apache.spark.repl.Main.createSparkSession()
}
//创建sc
@transient val sc = {
val _sc = spark.sparkContext
_sc.uiWebUrl.foreach(webUrl => println(s"Spark context Web UI available at ${webUrl}"))
println("Spark context available as 'sc' " +
s"(master = ${_sc.master}, app id = ${_sc.applicationId}).")
println("Spark session available as 'spark'.")
_sc
}
""")
processLine("import org.apache.spark.SparkContext._")
processLine("import spark.implicits._")
processLine("import spark.sql")
processLine("import org.apache.spark.sql.functions._")
replayCommandStack = Nil // remove above commands from session history.
}
}
/** Add repl commands that needs to be blocked. e.g. reset */
private val blockedCommands = Set[String]()
/** Standard commands */
lazy val sparkStandardCommands: List[SparkILoop.this.LoopCommand] =
standardCommands.filter(cmd => !blockedCommands(cmd.name))
/** Available commands */
override def commands: List[LoopCommand] = sparkStandardCommands
}
object SparkILoop {
/**
* Creates an interpreter loop with default settings and feeds
* the given code to it as input.
*/
def run(code: String, sets: Settings = new Settings): String = {
import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
stringFromStream { ostream =>
Console.withOut(ostream) {
val input = new BufferedReader(new StringReader(code))
val output = new JPrintWriter(new OutputStreamWriter(ostream), true)
val repl = new SparkILoop(input, output)
if (sets.classpath.isDefault) {
sets.classpath.value = sys.props("java.class.path")
}
//调用父类ILoop的process方法
repl process sets
}
}
}
def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString)
class ILoop(in0 : scala.Option[java.io.BufferedReader], protected val out : scala.tools.nsc.interpreter.JPrintWriter) extends scala.AnyRef with scala.tools.nsc.interpreter.LoopCommands {
def this(in0 : java.io.BufferedReader, out : scala.tools.nsc.interpreter.JPrintWriter) = { /* compiled code */ }
def helpCommand(line : scala.Predef.String) : ILoop.super[LoopCommands/*scala.tools.nsc.interpreter.LoopCommands*/].Result = { /* compiled code */ }
val historyCommand : ILoop.super[LoopCommands/*scala.tools.nsc.interpreter.LoopCommands*/].LoopCommand {
def defaultLines : scala.Int
} = { /* compiled code */ }
def searchHistory(_cmdline : scala.Predef.String) : scala.Unit = { /* compiled code */ }
//枚组,分组命令状态
object LineResults extends scala.Enumeration {
type LineResult = LineResults.Value
val EOF : LineResults.Value = { /* compiled code */ }
val ERR : LineResults.Value = { /* compiled code */ }
val OK : LineResults.Value = { /* compiled code */ }
}
def processLine(line : scala.Predef.String) : scala.Boolean = { /* compiled code */ }
@scala.annotation.tailrec
final def loop() : ILoop.this.LineResults.LineResult = { /* compiled code */ }
def reset() : scala.Unit = { /* compiled code */ }
def lineCommand(what : scala.Predef.String) : ILoop.super[LoopCommands/*scala.tools.nsc.interpreter.LoopCommands*/].Result = { /* compiled code */ }
def command(line : scala.Predef.String) : ILoop.super[LoopCommands/*scala.tools.nsc.interpreter.LoopCommands*/].Result = { /* compiled code */ }
def pasteCommand(arg : scala.Predef.String) : ILoop.super[LoopCommands/*scala.tools.nsc.interpreter.LoopCommands*/].Result = { /* compiled code */ }
def process(settings : scala.tools.nsc.Settings) : scala.Boolean = { /* compiled code */ }
@scala.deprecated("Use `process` instead")
def main(settings : scala.tools.nsc.Settings) : scala.Unit = { /* compiled code */ }
}
object ILoop extends scala.AnyRef {
implicit def loopToInterpreter(repl : scala.tools.nsc.interpreter.ILoop) : scala.tools.nsc.interpreter.IMain = { /* compiled code */ }
def runForTranscript(code : scala.Predef.String, settings : scala.tools.nsc.Settings, inSession : scala.Boolean = { /* compiled code */ }) : scala.Predef.String = { /* compiled code */ }
def run(code : scala.Predef.String, sets : scala.tools.nsc.Settings = { /* compiled code */ }) : scala.Predef.String = { /* compiled code */ }
def run(lines : scala.List[scala.Predef.String]) : scala.Predef.String = { /* compiled code */ }
}
可以看出ILoop是对shell命令的方法抽象,并没有进行实现,如
scala> :help
:cp <path> add a jar or directory to the classpath
:help [command] print this summary or command-specific help
:history [num] show the history (optional num is commands to show)
:h? <string> search the history
:imports [name name ...] show import history, identifying sources of names
:implicits [-v] show the implicits in scope
:javap <path|class> disassemble a file or class name
:load <path> load and interpret a Scala file
:paste enter paste mode: all input up to ctrl-D compiled together
:quit exit the repl
:replay reset execution and replay all previous commands
:reset reset the repl to its initial state, forgetting all session entries
:sh <command line> run a shell command (result is implicitly => List[String])
:silent disable/enable automatic printing of results
:fallback
disable/enable advanced repl changes, these fix some issues but may introduce others.
This mode will be removed once these fixes stablize
:type [-v] <expr> display the type of an expression without evaluating it
:warnings show the suppressed warnings from the most recent line which had any
scala
.tools.nsc.interpreter.
LoopCommands特质中的LoopCommands进行命令封装,然后执行命令并返回结果,代码如下:
trait LoopCommands extends scala.AnyRef {
abstract class LoopCommand(val name : scala.Predef.String, val help : scala.Predef.String) extends scala.AnyRef with scala.Function1[scala.Predef.String, LoopCommands.this.Result] {
def usage : scala.Predef.String = { /* compiled code */ }
def usageMsg : scala.Predef.String = { /* compiled code */ }
def apply(line : scala.Predef.String) : LoopCommands.this.Result
//返回执行结果
def showUsage() : LoopCommands.this.Result = { /* compiled code */ }
}
object LoopCommand extends scala.AnyRef {
def nullary(name : scala.Predef.String, help : scala.Predef.String, f : scala.Function0[LoopCommands.this.Result]) : LoopCommands.this.LoopCommand = { /* compiled code */ }
//执行命令
def cmd(name : scala.Predef.String, usage : scala.Predef.String, help : scala.Predef.String, f : scala.Function1[scala.Predef.String, LoopCommands.this.Result]) : LoopCommands.this.LoopCommand = { /* compiled code */ }
}
//封装返回结果
case class Result(val keepRunning : scala.Boolean, val lineToRecord : scala.Option[scala.Predef.String]) extends scala.AnyRef with scala.Product with scala.Serializable {
}
object Result extends scala.AnyRef with scala.Serializable {
val default : LoopCommands.this.Result = { /* compiled code */ }
def recording(line : scala.Predef.String) : LoopCommands.this.Result = { /* compiled code */ }
}
}
主要使用LoopCommands.this.LoopCommand和LoopCommands.this.Result伴生类和对象,对命令和结果进行封装
1.2在repl.Main中涉及两个主要方法,分别是doMain和createSparkSession,代码如下:
private[repl] def doMain(args: Array[String], _interp: SparkILoop): Unit = {
val settings = new GenericRunnerSettings(scalaOptionError)
settings.processArguments(interpArguments, true)
}
def createSparkSession(): SparkSession = {
val builder = SparkSession.builder.config(conf)
//获取sparkSession的实现方式并创建
if (conf.get(CATALOG_IMPLEMENTATION.key, "hive").toLowerCase == "hive") {
if (SparkSession.hiveClassesArePresent) {
sparkSession = builder.enableHiveSupport().getOrCreate()
logInfo("Created Spark session with Hive support")
} else {
builder.config(CATALOG_IMPLEMENTATION.key, "in-memory")
sparkSession = builder.getOrCreate()
logInfo("Created Spark session")
}
} else {
sparkSession = builder.getOrCreate()
logInfo("Created Spark session")
}
//通过sparkSession创建sparkContext
sparkContext = sparkSession.sparkContext
//Signaling中途退出
Signaling.cancelOnInterrupt(sparkContext)
sparkSession
}
scala.tools.nsc.GenericRunnerSettings,通过他的父类
scala.tools.nsc.settings.MutableSettings提供的方法,如
processArgumentString函数进行参数封装
通过
org.apache.spark.internal.config加载配置信息,代码如:
package object config {
//Driver的一些信息,包括driver_class_path,driver_java_options,driver_library_path,driver_user_library_path_first,driver_memory
private[spark] val DRIVER_JAVA_OPTIONS =
ConfigBuilder(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS).stringConf.createOptional
private[spark] val DRIVER_MEMORY = ConfigBuilder("spark.driver.memory")
.bytesConf(ByteUnit.MiB)
.createWithDefaultString("1g")
//Executor的一些信息,executor_class_path,executor_java_options,executor_library_path,executor_user_library_path_first,executor_memory
private[spark] val EXECUTOR_JAVA_OPTIONS =
ConfigBuilder(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS).stringConf.createOptional
private[spark] val EXECUTOR_MEMORY = ConfigBuilder("spark.executor.memory")
.bytesConf(ByteUnit.MiB)
.createWithDefaultString("1g")
//CUP信息:spark.task.cpus,默认是1
//动态的Executors信息,如spark.dynamicAllocation.minExecutors/initialExecutors/maxExecutors
//spark.shuffle.service.enabled,默认值是false,设置为true
//spark.executor.instances executor的执行接口???
//sql实现
private[spark] val CATALOG_IMPLEMENTATION = ConfigBuilder("spark.sql.catalogImplementation")
.internal()
.stringConf
.checkValues(Set("hive", "in-memory"))
.createWithDefault("in-memory")
}
org.apache.spark.launcher.SparkLauncher,SparkLauncher中封装了application所需要的配置信息,并启动一个application,代码如:
public class SparkLauncher {
public static final String SPARK_MASTER = "spark.master";
public static final String DEPLOY_MODE = "spark.submit.deployMode";
...
static final Map<String, String> launcherConfig = new HashMap<>(); //其他配置信息加载
方法:
setConfig
setJavaHome
setSparkHome
setPropertiesFile
setAppName //设置application名称
setMaster //设置master
setDeployMode //运行模式
setMainClass //运行类
addSparkArg //添加spark的参数
addAppArgs //application的参数
addJar/addFile/
setVerbose
startApplication //主要方法,启动application,同时该方法返回SparkAppHandle,SparkAppHandle会实例化单一的SparkContext,同时在SparkContext的整个生命周期,报告SparkContext的状态,比如SparkContext停止,不能检测到SparkContext的状态,同时运行的是一个子进程,则会
SparkAppHandle#kill()
通过
CHILD_PROCESS_LOGGER_NAME这只application的日志名称,如果该项没有设置,则会org.apache.spark.launcher.app
开始命名日志开头。
3.
Signaling主要是用来中途退出,如:quit或者ctrl + c,代码如下:
private[repl] object Signaling extends Logging {
def cancelOnInterrupt(ctx: SparkContext): Unit = SignalUtils.register("INT") {
if (!ctx.statusTracker.getActiveJobIds().isEmpty) {
logWarning("Cancelling all active jobs, this can take a while. " +
"Press Ctrl+C again to exit now.")
ctx.cancelAllJobs()
true
} else {
false
}
}
}
首先我们来看SPARK_HOME/bin/spark-shell脚本,内容:
function main() {
if $cygwin; then
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
stty icanon echo > /dev/null 2>&1
else
export SPARK_SUBMIT_OPTS
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
fi
}
可以看到使用的是org.apache.spark.repl.Main为入口。
1.查阅repl.Main源代码,主要设计如下内容
1.1定义私有变量 conf,sparkContext,sparkSession,SparkILoop,hasErrors=false用来标记错误,代码如下:
object Main extends Logging {
val conf = new SparkConf()
var sparkContext: SparkContext = _
var sparkSession: SparkSession = _
// this is a public var because tests reset it.
var interp: SparkILoop = _
private var hasErrors = false
private def scalaOptionError(msg: String): Unit = {
hasErrors = true
Console.err.println(msg)
}
def main(args: Array[String]) {
doMain(args, new SparkILoop)
}
}
class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) extends ILoop(in0, out) {
def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out)
def initializeSpark() {
intp.beQuietDuring {
//创建sparkSession,使用transient修饰符来标识一个成员变量在序列化子系统中应被忽略
//调用父类ILoop的processLine方法
processLine("""
@transient val spark = if (org.apache.spark.repl.Main.sparkSession != null) {
org.apache.spark.repl.Main.sparkSession
} else {
org.apache.spark.repl.Main.createSparkSession()
}
//创建sc
@transient val sc = {
val _sc = spark.sparkContext
_sc.uiWebUrl.foreach(webUrl => println(s"Spark context Web UI available at ${webUrl}"))
println("Spark context available as 'sc' " +
s"(master = ${_sc.master}, app id = ${_sc.applicationId}).")
println("Spark session available as 'spark'.")
_sc
}
""")
processLine("import org.apache.spark.SparkContext._")
processLine("import spark.implicits._")
processLine("import spark.sql")
processLine("import org.apache.spark.sql.functions._")
replayCommandStack = Nil // remove above commands from session history.
}
}
/** Add repl commands that needs to be blocked. e.g. reset */
private val blockedCommands = Set[String]()
/** Standard commands */
lazy val sparkStandardCommands: List[SparkILoop.this.LoopCommand] =
standardCommands.filter(cmd => !blockedCommands(cmd.name))
/** Available commands */
override def commands: List[LoopCommand] = sparkStandardCommands
}
object SparkILoop {
/**
* Creates an interpreter loop with default settings and feeds
* the given code to it as input.
*/
def run(code: String, sets: Settings = new Settings): String = {
import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
stringFromStream { ostream =>
Console.withOut(ostream) {
val input = new BufferedReader(new StringReader(code))
val output = new JPrintWriter(new OutputStreamWriter(ostream), true)
val repl = new SparkILoop(input, output)
if (sets.classpath.isDefault) {
sets.classpath.value = sys.props("java.class.path")
}
//调用父类ILoop的process方法
repl process sets
}
}
}
def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString)
class ILoop(in0 : scala.Option[java.io.BufferedReader], protected val out : scala.tools.nsc.interpreter.JPrintWriter) extends scala.AnyRef with scala.tools.nsc.interpreter.LoopCommands {
def this(in0 : java.io.BufferedReader, out : scala.tools.nsc.interpreter.JPrintWriter) = { /* compiled code */ }
def helpCommand(line : scala.Predef.String) : ILoop.super[LoopCommands/*scala.tools.nsc.interpreter.LoopCommands*/].Result = { /* compiled code */ }
val historyCommand : ILoop.super[LoopCommands/*scala.tools.nsc.interpreter.LoopCommands*/].LoopCommand {
def defaultLines : scala.Int
} = { /* compiled code */ }
def searchHistory(_cmdline : scala.Predef.String) : scala.Unit = { /* compiled code */ }
//枚组,分组命令状态
object LineResults extends scala.Enumeration {
type LineResult = LineResults.Value
val EOF : LineResults.Value = { /* compiled code */ }
val ERR : LineResults.Value = { /* compiled code */ }
val OK : LineResults.Value = { /* compiled code */ }
}
def processLine(line : scala.Predef.String) : scala.Boolean = { /* compiled code */ }
@scala.annotation.tailrec
final def loop() : ILoop.this.LineResults.LineResult = { /* compiled code */ }
def reset() : scala.Unit = { /* compiled code */ }
def lineCommand(what : scala.Predef.String) : ILoop.super[LoopCommands/*scala.tools.nsc.interpreter.LoopCommands*/].Result = { /* compiled code */ }
def command(line : scala.Predef.String) : ILoop.super[LoopCommands/*scala.tools.nsc.interpreter.LoopCommands*/].Result = { /* compiled code */ }
def pasteCommand(arg : scala.Predef.String) : ILoop.super[LoopCommands/*scala.tools.nsc.interpreter.LoopCommands*/].Result = { /* compiled code */ }
def process(settings : scala.tools.nsc.Settings) : scala.Boolean = { /* compiled code */ }
@scala.deprecated("Use `process` instead")
def main(settings : scala.tools.nsc.Settings) : scala.Unit = { /* compiled code */ }
}
object ILoop extends scala.AnyRef {
implicit def loopToInterpreter(repl : scala.tools.nsc.interpreter.ILoop) : scala.tools.nsc.interpreter.IMain = { /* compiled code */ }
def runForTranscript(code : scala.Predef.String, settings : scala.tools.nsc.Settings, inSession : scala.Boolean = { /* compiled code */ }) : scala.Predef.String = { /* compiled code */ }
def run(code : scala.Predef.String, sets : scala.tools.nsc.Settings = { /* compiled code */ }) : scala.Predef.String = { /* compiled code */ }
def run(lines : scala.List[scala.Predef.String]) : scala.Predef.String = { /* compiled code */ }
}
可以看出ILoop是对shell命令的方法抽象,并没有进行实现,如
scala> :help
:cp <path> add a jar or directory to the classpath
:help [command] print this summary or command-specific help
:history [num] show the history (optional num is commands to show)
:h? <string> search the history
:imports [name name ...] show import history, identifying sources of names
:implicits [-v] show the implicits in scope
:javap <path|class> disassemble a file or class name
:load <path> load and interpret a Scala file
:paste enter paste mode: all input up to ctrl-D compiled together
:quit exit the repl
:replay reset execution and replay all previous commands
:reset reset the repl to its initial state, forgetting all session entries
:sh <command line> run a shell command (result is implicitly => List[String])
:silent disable/enable automatic printing of results
:fallback
disable/enable advanced repl changes, these fix some issues but may introduce others.
This mode will be removed once these fixes stablize
:type [-v] <expr> display the type of an expression without evaluating it
:warnings show the suppressed warnings from the most recent line which had any
scala
.tools.nsc.interpreter.
LoopCommands特质中的LoopCommands进行命令封装,然后执行命令并返回结果,代码如下:
trait LoopCommands extends scala.AnyRef {
abstract class LoopCommand(val name : scala.Predef.String, val help : scala.Predef.String) extends scala.AnyRef with scala.Function1[scala.Predef.String, LoopCommands.this.Result] {
def usage : scala.Predef.String = { /* compiled code */ }
def usageMsg : scala.Predef.String = { /* compiled code */ }
def apply(line : scala.Predef.String) : LoopCommands.this.Result
//返回执行结果
def showUsage() : LoopCommands.this.Result = { /* compiled code */ }
}
object LoopCommand extends scala.AnyRef {
def nullary(name : scala.Predef.String, help : scala.Predef.String, f : scala.Function0[LoopCommands.this.Result]) : LoopCommands.this.LoopCommand = { /* compiled code */ }
//执行命令
def cmd(name : scala.Predef.String, usage : scala.Predef.String, help : scala.Predef.String, f : scala.Function1[scala.Predef.String, LoopCommands.this.Result]) : LoopCommands.this.LoopCommand = { /* compiled code */ }
}
//封装返回结果
case class Result(val keepRunning : scala.Boolean, val lineToRecord : scala.Option[scala.Predef.String]) extends scala.AnyRef with scala.Product with scala.Serializable {
}
object Result extends scala.AnyRef with scala.Serializable {
val default : LoopCommands.this.Result = { /* compiled code */ }
def recording(line : scala.Predef.String) : LoopCommands.this.Result = { /* compiled code */ }
}
}
主要使用LoopCommands.this.LoopCommand和LoopCommands.this.Result伴生类和对象,对命令和结果进行封装
1.2在repl.Main中涉及两个主要方法,分别是doMain和createSparkSession,代码如下:
private[repl] def doMain(args: Array[String], _interp: SparkILoop): Unit = {
val settings = new GenericRunnerSettings(scalaOptionError)
settings.processArguments(interpArguments, true)
}
def createSparkSession(): SparkSession = {
val builder = SparkSession.builder.config(conf)
//获取sparkSession的实现方式并创建
if (conf.get(CATALOG_IMPLEMENTATION.key, "hive").toLowerCase == "hive") {
if (SparkSession.hiveClassesArePresent) {
sparkSession = builder.enableHiveSupport().getOrCreate()
logInfo("Created Spark session with Hive support")
} else {
builder.config(CATALOG_IMPLEMENTATION.key, "in-memory")
sparkSession = builder.getOrCreate()
logInfo("Created Spark session")
}
} else {
sparkSession = builder.getOrCreate()
logInfo("Created Spark session")
}
//通过sparkSession创建sparkContext
sparkContext = sparkSession.sparkContext
//Signaling中途退出
Signaling.cancelOnInterrupt(sparkContext)
sparkSession
}
scala.tools.nsc.GenericRunnerSettings,通过他的父类
scala.tools.nsc.settings.MutableSettings提供的方法,如
processArgumentString函数进行参数封装
通过
org.apache.spark.internal.config加载配置信息,代码如:
package object config {
//Driver的一些信息,包括driver_class_path,driver_java_options,driver_library_path,driver_user_library_path_first,driver_memory
private[spark] val DRIVER_JAVA_OPTIONS =
ConfigBuilder(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS).stringConf.createOptional
private[spark] val DRIVER_MEMORY = ConfigBuilder("spark.driver.memory")
.bytesConf(ByteUnit.MiB)
.createWithDefaultString("1g")
//Executor的一些信息,executor_class_path,executor_java_options,executor_library_path,executor_user_library_path_first,executor_memory
private[spark] val EXECUTOR_JAVA_OPTIONS =
ConfigBuilder(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS).stringConf.createOptional
private[spark] val EXECUTOR_MEMORY = ConfigBuilder("spark.executor.memory")
.bytesConf(ByteUnit.MiB)
.createWithDefaultString("1g")
//CUP信息:spark.task.cpus,默认是1
//动态的Executors信息,如spark.dynamicAllocation.minExecutors/initialExecutors/maxExecutors
//spark.shuffle.service.enabled,默认值是false,设置为true
//spark.executor.instances executor的执行接口???
//sql实现
private[spark] val CATALOG_IMPLEMENTATION = ConfigBuilder("spark.sql.catalogImplementation")
.internal()
.stringConf
.checkValues(Set("hive", "in-memory"))
.createWithDefault("in-memory")
}
org.apache.spark.launcher.SparkLauncher,SparkLauncher中封装了application所需要的配置信息,并启动一个application,代码如:
public class SparkLauncher {
public static final String SPARK_MASTER = "spark.master";
public static final String DEPLOY_MODE = "spark.submit.deployMode";
...
static final Map<String, String> launcherConfig = new HashMap<>(); //其他配置信息加载
方法:
setConfig
setJavaHome
setSparkHome
setPropertiesFile
setAppName //设置application名称
setMaster //设置master
setDeployMode //运行模式
setMainClass //运行类
addSparkArg //添加spark的参数
addAppArgs //application的参数
addJar/addFile/
setVerbose
startApplication //主要方法,启动application,同时该方法返回SparkAppHandle,SparkAppHandle会实例化单一的SparkContext,同时在SparkContext的整个生命周期,报告SparkContext的状态,比如SparkContext停止,不能检测到SparkContext的状态,同时运行的是一个子进程,则会
SparkAppHandle#kill()
通过
CHILD_PROCESS_LOGGER_NAME这只application的日志名称,如果该项没有设置,则会org.apache.spark.launcher.app
开始命名日志开头。
3.
Signaling主要是用来中途退出,如:quit或者ctrl + c,代码如下:
private[repl] object Signaling extends Logging {
def cancelOnInterrupt(ctx: SparkContext): Unit = SignalUtils.register("INT") {
if (!ctx.statusTracker.getActiveJobIds().isEmpty) {
logWarning("Cancelling all active jobs, this can take a while. " +
"Press Ctrl+C again to exit now.")
ctx.cancelAllJobs()
true
} else {
false
}
}
}