编辑

mkdir ~/.ipython/kernels/pyspark
vim ~/.ipython/kernels/pyspark/kernel.json

kernel.json 内容

{
"display_name": "pySpark",
"language": "python",
"argv": [
"/var/local/anaconda2/bin/python",
"-m",
"IPython.kernel",
"-f",
"{connection_file}"
],
"env": {
"JAVA_HOME": "/opt/jdk8",
"SPARK_HOME": "/usr/hdp/3.0.1.0-187/spark2",
"PYTHONPATH": "/usr/hdp/3.0.1.0-187/spark2/python:/usr/hdp/3.0.1.0-187/spark2/python/lib/py4j-0.10.7-src.zip",
"PYTHONSTARTUP": "/usr/hdp/3.0.1.0-187/spark2/python/pyspark/shell.py ",
"PYSPARK_SUBMIT_ARGS": "pyspark-shell"
}
}

实验验证

import os
#os.environ['SPARK_HOME']='/usr/hdp/3.0.1.0-187/spark2/'
#os.environ['JAVA_HOME']='/opt/jdk8'


from pyspark import SparkContext, SparkConf
# #Spark Config
conf=SparkConf().setAppName("testspark").setMaster("spark://10.244.0.29:7077")
sc = SparkContext(conf=conf)

text_file = sc.textFile("hdfs:///root/test/spark/test.txt")

counts = text_file.flatMap(lambda line: line.split(" ")) \
.map(lambda word: (word, 1)) \
.reduceByKey(lambda a, b: a + b)

print counts