#先格式化
cd ../sbin/
#进入sbin目录,这里放的都是server启动脚本
./hadoop-daemon.sh start namenode
./hadoop-daemon.sh start datanode
./hadoop-daemon.sh start secondarynamenode
#备份服起不起都无所谓,不影响使用,不过可以用来试试HA功能
#下面较重要,2.0取消了jobtracker和tasktracker,以YARN来代替,所以如果运行start jobtracker一类的,会报错。
#且hadoop,hdfs,map/reduce功能都分离出了单独脚本,所以不能用hadoop-daemon.sh启动所有了。
./yarn-daemon.sh start resourcemanager
#这个就相当于原来的jobtracker,用作运算资源分配的进程,跟namenode可放在一起。
./yarn-daemon.sh start nodemanager
#这个相当于原来的tasktracker,每台datanode或者叫slave的服务器上都要启动。
#-*- encoding:UTF-8 -*-
#map.py
import sys
debug = True
if debug:
lzo = 0
else:
lzo = 1
count='0'
for line in sys.stdin:
try:
flags = line[:-1].split('\t')
if len(flags) == 0:
break
if len(flags) != 5+lzo:
continue
stat_date = flags[2+lzo].split(' ')[0]
version = flags[5+lzo].split('"')[1]
str = stat_date+','+version+'\t'+count
print str
except Exception,e:
print e
------------------------------------------------------------------
#-*- encoding:UTF-8 -*-
#reduce.py
import sys
import string
res = {}
#声明字典
for line in sys.stdin:
try:
flags = line[:-1].split('\t')
if len(flags) != 2:
continue
field_key = flags[0]
if res.has_key(field_key) == False:
res[field_key] = 0
res[field_key] += 1
except Exception,e:
pass
for key in res.keys():
print key+','+'%s' % (res[key])
./hadoop fs -copyFromLocal /root/asf /tmp/asf
或者
./yarn jar /opt/hadoop/share/hadoop/tools/lib/hadoop-streaming-2.0.0-alpha.jar -mapper /opt/hadoop/mrs/map.py -reducer /opt/hadoop/mrs/red.py -input /tmp/asf -output /asf
12/06/01 23:26:40 WARN util.KerberosName: Kerberos krb5 configuration not found, setting default realm to empty
12/06/01 23:26:41 WARN conf.Configuration: session.id is deprecated. Instead, use dfs.metrics.session-id
12/06/01 23:26:41 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
12/06/01 23:26:41 INFO jvm.JvmMetrics: Cannot initialize JVM Metrics with processName=JobTracker, sessionId= - already initialized
12/06/01 23:26:41 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
12/06/01 23:26:42 WARN snappy.LoadSnappy: Snappy native library not loaded
12/06/01 23:26:42 INFO mapred.FileInputFormat: Total input paths to process : 1
12/06/01 23:26:42 INFO mapreduce.JobSubmitter: number of splits:1
12/06/01 23:26:42 WARN conf.Configuration: mapred.jar is deprecated. Instead, use mapreduce.job.jar
12/06/01 23:26:42 WARN conf.Configuration: mapred.create.symlink is deprecated. Instead, use mapreduce.job.cache.symlink.create
12/06/01 23:26:42 WARN conf.Configuration: mapred.job.name is deprecated. Instead, use mapreduce.job.name
12/06/01 23:26:42 WARN conf.Configuration: mapred.input.dir is deprecated. Instead, use mapreduce.input.fileinputformat.inputdir
12/06/01 23:26:42 WARN conf.Configuration: mapred.output.dir is deprecated. Instead, use mapreduce.output.fileoutputformat.outputdir
12/06/01 23:26:42 WARN conf.Configuration: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
12/06/01 23:26:42 WARN conf.Configuration: mapred.output.value.class is deprecated. Instead, use mapreduce.job.output.value.class
12/06/01 23:26:42 WARN conf.Configuration: mapred.output.key.class is deprecated. Instead, use mapreduce.job.output.key.class
12/06/01 23:26:42 WARN conf.Configuration: mapred.mapoutput.value.class is deprecated. Instead, use mapreduce.map.output.value.class
12/06/01 23:26:42 WARN conf.Configuration: mapred.mapoutput.key.class is deprecated. Instead, use mapreduce.map.output.key.class
12/06/01 23:26:42 WARN conf.Configuration: mapred.working.dir is deprecated. Instead, use mapreduce.job.working.dir
12/06/01 23:26:42 WARN mapred.LocalDistributedCacheManager: LocalJobRunner does not support symlinking into current working dir.
12/06/01 23:26:42 INFO mapreduce.Job: The url to track the job: http://localhost:8080/
12/06/01 23:26:42 INFO mapreduce.Job: Running job: job_local_0001
12/06/01 23:26:42 INFO mapred.LocalJobRunner: OutputCommitter set in config null
12/06/01 23:26:42 INFO mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapred.FileOutputCommitter
12/06/01 23:26:42 INFO mapred.LocalJobRunner: Waiting for map tasks
12/06/01 23:26:42 INFO mapred.LocalJobRunner: Starting task: attempt_local_0001_m_000000_0
12/06/01 23:26:42 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.yarn.util.LinuxResourceCalculatorPlugin@52b5ef94
12/06/01 23:26:42 INFO mapred.MapTask: numReduceTasks: 1
12/06/01 23:26:42 INFO mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
12/06/01 23:26:42 INFO mapred.MapTask: mapreduce.task.io.sort.mb: 100
12/06/01 23:26:42 INFO mapred.MapTask: soft limit at 83886080
12/06/01 23:26:42 INFO mapred.MapTask: bufstart = 0; bufvoid = 104857600
12/06/01 23:26:42 INFO mapred.MapTask: kvstart = 26214396; length = 6553600
12/06/01 23:26:42 INFO streaming.PipeMapRed: PipeMapRed exec [/opt/hadoop/mrs/map.py]
12/06/01 23:26:42 WARN conf.Configuration: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
12/06/01 23:26:42 WARN conf.Configuration: user.name is deprecated. Instead, use mapreduce.job.user.name
12/06/01 23:26:42 WARN conf.Configuration: map.input.start is deprecated. Instead, use mapreduce.map.input.start
12/06/01 23:26:42 WARN conf.Configuration: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap
12/06/01 23:26:42 WARN conf.Configuration: mapred.tip.id is deprecated. Instead, use mapreduce.task.id
12/06/01 23:26:42 WARN conf.Configuration: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
12/06/01 23:26:42 WARN conf.Configuration: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition
12/06/01 23:26:42 WARN conf.Configuration: map.input.length is deprecated. Instead, use mapreduce.map.input.length
12/06/01 23:26:42 WARN conf.Configuration: mapred.local.dir is deprecated. Instead, use mapreduce.cluster.local.dir
12/06/01 23:26:42 WARN conf.Configuration: mapred.work.output.dir is deprecated. Instead, use mapreduce.task.output.dir
12/06/01 23:26:42 WARN conf.Configuration: map.input.file is deprecated. Instead, use mapreduce.map.input.file
12/06/01 23:26:42 WARN conf.Configuration: mapred.job.id is deprecated. Instead, use mapreduce.job.id
12/06/01 23:26:43 INFO streaming.PipeMapRed: R/W/S=1/0/0 in:NA [rec/s] out:NA [rec/s]
12/06/01 23:26:43 INFO streaming.PipeMapRed: R/W/S=10/0/0 in:NA [rec/s] out:NA [rec/s]
12/06/01 23:26:43 INFO streaming.PipeMapRed: MRErrorThread done
12/06/01 23:26:43 INFO streaming.PipeMapRed: Records R/W=20/1
12/06/01 23:26:43 INFO streaming.PipeMapRed: mapRedFinished
12/06/01 23:26:43 INFO mapred.LocalJobRunner:
12/06/01 23:26:43 INFO mapred.MapTask: Starting flush of map output
12/06/01 23:26:43 INFO mapred.MapTask: Spilling map output
12/06/01 23:26:43 INFO mapred.MapTask: bufstart = 0; bufend = 560; bufvoid = 104857600
12/06/01 23:26:43 INFO mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214320(104857280); length = 77/6553600
12/06/01 23:26:43 INFO mapred.MapTask: Finished spill 0
12/06/01 23:26:43 INFO mapred.Task: Task:attempt_local_0001_m_000000_0 is done. And is in the process of committing
12/06/01 23:26:43 INFO mapred.LocalJobRunner: Records R/W=20/1
12/06/01 23:26:43 INFO mapred.Task: Task 'attempt_local_0001_m_000000_0' done.
12/06/01 23:26:43 INFO mapred.LocalJobRunner: Finishing task: attempt_local_0001_m_000000_0
12/06/01 23:26:43 INFO mapred.LocalJobRunner: Map task executor complete.
12/06/01 23:26:43 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.yarn.util.LinuxResourceCalculatorPlugin@25d71236
12/06/01 23:26:43 INFO mapred.Merger: Merging 1 sorted segments
12/06/01 23:26:43 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 574 bytes
12/06/01 23:26:43 INFO mapred.LocalJobRunner:
12/06/01 23:26:43 INFO streaming.PipeMapRed: PipeMapRed exec [/opt/hadoop/mrs/red.py]
12/06/01 23:26:43 WARN conf.Configuration: mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
12/06/01 23:26:43 WARN conf.Configuration: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
12/06/01 23:26:43 INFO streaming.PipeMapRed: R/W/S=1/0/0 in:NA [rec/s] out:NA [rec/s]
12/06/01 23:26:43 INFO streaming.PipeMapRed: R/W/S=10/0/0 in:NA [rec/s] out:NA [rec/s]
12/06/01 23:26:43 INFO streaming.PipeMapRed: Records R/W=20/1
12/06/01 23:26:43 INFO streaming.PipeMapRed: MRErrorThread done
12/06/01 23:26:43 INFO streaming.PipeMapRed: mapRedFinished
12/06/01 23:26:43 INFO mapred.Task: Task:attempt_local_0001_r_000000_0 is done. And is in the process of committing
12/06/01 23:26:43 INFO mapred.LocalJobRunner:
12/06/01 23:26:43 INFO mapred.Task: Task attempt_local_0001_r_000000_0 is allowed to commit now
12/06/01 23:26:43 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to hdfs://localhost:9000/asf/_temporary/0/task_local_0001_r_000000
12/06/01 23:26:43 INFO mapred.LocalJobRunner: Records R/W=20/1 > reduce
12/06/01 23:26:43 INFO mapred.Task: Task 'attempt_local_0001_r_000000_0' done.
12/06/01 23:26:43 INFO mapreduce.Job: Job job_local_0001 running in uber mode : false
12/06/01 23:26:43 INFO mapreduce.Job: map 100% reduce 100%
12/06/01 23:26:43 INFO mapreduce.Job: Job job_local_0001 completed successfully
12/06/01 23:26:43 INFO mapreduce.Job: Counters: 32
File System Counters
FILE: Number of bytes read=205938
FILE: Number of bytes written=452840
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=252230
HDFS: Number of bytes written=59
HDFS: Number of read operations=13
HDFS: Number of large read operations=0
HDFS: Number of write operations=4
Map-Reduce Framework
Map input records=20
Map output records=20
Map output bytes=560
Map output materialized bytes=606
Input split bytes=81
Combine input records=0
Combine output records=0
Reduce input groups=2
Reduce shuffle bytes=0
Reduce input records=20
Reduce output records=2
Spilled Records=40
Shuffled Maps =0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=12
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=396361728
File Input Format Counters
Bytes Read=126115
File Output Format Counters
Bytes Written=59
12/06/01 23:26:43 INFO streaming.StreamJob: Output directory: /asf