jdk

下载

http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html

1

解压

sudo tar -zxvf jdk-8u141-linux-x64.tar.gz -C /usr/local/

1

设置环境变量

sudo vim /etc/profile

# 添加以下

export JAVA_HOME=/usr/local/jdk1.8.0_141

export PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin

# 立即生效

source /etc/profile

1

2

3

4

5

6

添加用户组

创建

sudo addgroup hadoop

sudo adduser -ingroup  hadoop hadoop

1

2

添加权限

sudo vim /etc/sudoers

# 添加以下内容

hadoop  ALL=(ALL:ALL) ALL

1

2

3

hadoop

下载

http://hadoop.apache.org/releases.html

1

解压

sudo tar -zxvf hadoop-2.7.3.tar.gz -C /usr/local

1

环境变量

sudo vim /etc/profile

# 添加以下

export HADOOP_HOME=/usr/local/hadoop-2.7.3

export PATH=$PATH:$HADOOP_HOME/bin

export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native

export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"

# 立即生效

source /etc/profile

cd /usr/local/hadoop-2.7.3/etc/hadoop/

sudo gedit hadoop-env.sh

export JAVA_HOME=/usr/local/jdk1.8.0_141

1

2

3

4

5

6

7

8

9

10

11

12

测试

cd /usr/local/hadoop-2.7.3

sudo mkdir input

sudo cp README.txt input/

sudo bin/hadoop jar share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.7.3-sources.jar  org.apache.hadoop.examples.WordCount input output

1

2

3

4

ssh

安装

sudo apt-get install openssh-server

1

启动

sudo /etc/init.d/ssh start

1

查看

ps -e | grep ssh

1

生成秘钥

ssh-keygen -t rsa -P ""

cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorizd_keys

1

2

设置root登录

sudo gedit /etc/ssh/sshd_config 

# 修改如下

PasswordAuthentication yes 

PermitRootLogin yes 

RSAAuthentication yes 

PubkeyAuthentication yes 

AuthorizedKeysFile %h/.ssh/authorized_keys

# 生效

service sshd restart

1

2

3

4

5

6

7

8

9

登录

ssh localhost

1

搭建伪分布式

创建文件夹

mkdir tmp

mkdir dfs

mkdir dfs/name

mkdir dfs/data

1

2

3

4

5

tmp是用来存放零时文件,比例运行过程中的文件等。namenode和datanode文件夹默认是放在tmp里面的,这2个文件夹用来存储hdfs里的内容。 

不配置的话,hadoop默认把tmp会创建在ubuntu系统里的/tmp文件夹里,电脑一旦重启会自动清除tmp文件夹内容,同时也清除了里面的namenode和datanode文件内容,这样就会造成每次重启电脑namenode和datanode内容都不在了,那就需要重写格式化Hadoop文件系统hdfs,以前运行的记录和文件都会没有。所有配置了tmp和namenode和datanode文件夹,重启后可以不用格式化,原文件依然保持在hadoop文件系统上,只是放在了自己的目录里。

1

2

配置core-site.xml文件

cd /usr/local/hadoop-2.7.3/etc/hadoop

sudo vim core-site.xml

# 添加如下

<configuration>

    <property>

        <name>fs.defaultFS</name>

        <value>hdfs://localhost:9009</value>

    </property>

    <property>

        <name>hadoop.tmp.dir</name>

        <value>/usr/local/hadoop-2.7.3/tmp</value>

    </property>

</configuration>

1

2

3

4

5

6

7

8

9

10

11

12

13

14

配置hdfs-site.xml文件

<configuration>

    <property>

        <name>dfs.replication</name>

        <value>1</value>

    </property>

    <property>

        <name>dfs.namenode.name.dir</name>

        <value>/usr/local/hadoop-2.7.3/dfs/name</value>

    </property>

    <property>

        <name>dfs.datanode.data.dir</name>

        <value>/usr/local/hadoop-2.7.3/dfs/data</value>

    </property>

</configuration>

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

hdfs

# 每次运行之前删除掉tmp下的文件和dfs下name和data中的文件

rm -fr tmp/*

rm -fr dfs/name/*

rm -fr dfs/data/*

1

2

3

4

sudo chown -R qihao:qihao hadoop-2.7.3/

bin/hdfs namenode -format 

sbin/start-dfs.sh 

jps

# 一开始我的9000端口被占用,NameNode一直没有出来,改成9009之后就好了

114371 NameNode

115619 NodeManager

115317 ResourceManager

114711 SecondaryNameNode

115658 Jps

114522 DataNode

1

2

3

4

5

6

7

8

9

10

11

配置eclipse

下载插件并放到eclipse的plugins文件夹下

1

配置hadoop主目录

在eclipse的Windows->Preferences的Hadoop Map/Reduce中设置安装目录

1

 

* 配置插件

打开Windows->Open Perspective中的Map/Reduce,在此perspective下进行hadoop程序开发

1

打开Windows->Show View->Other->MapRduce Tools->Map/Reduce Locations,选择New Hadoop location…新建hadoop连接如下图

1

2

Location name和Host填写localhost,Map/Reduce Master的端口号必须和Mapred-site.xml中的HDFS配置端口号一致,这里填写9001,DFS Master填写HDFS的端口号必须和core-site.xml中的HDFS配置端口一致,这里填写9009,User name为Hadoop的所有者用户名,即安装Hadoop的Linux用户,这里为qihao

1

测试

新建Map/Reduce工程

src——>new——>other可以在工程中建立Map类,Reduce类,以及MapReduceDriver类,向导会自动生成3个类的框架,向里面填写相关代码,之后点击MapReduceDriver类——>Run on hadoop来运行Hadoop应用

1

Map代码

package com.qihao;

import java.io.IOException;

import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class MyMap extends Mapper<LongWritable, Text, Text, IntWritable> {

    private final static IntWritable one = new IntWritable(1);

    private Text word = new Text();

    public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {

        StringTokenizer itr = new StringTokenizer(ivalue.toString());

        while (itr.hasMoreElements()) {

            word.set(itr.nextToken());

            context.write(word, one);

        }

    }

}

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

Reduce代码

package com.qihao;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class MyReduce extends Reducer<Text, IntWritable, Text, IntWritable> {

    public void reduce(Text _key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        // process values

        int sum = 0;  

        for (IntWritable val : values) {  

            sum += val.get();  

        }  

        context.write(_key, new IntWritable(sum));  

    }

}

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

主程序

package com.qihao;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

public class MyRun {

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        if (otherArgs.length != 2) {

            System.err.println("Usage: Wordcount <in> <out>");

            System.exit(2);

        }

        Job job = Job.getInstance(conf, "JobName");

        job.setJarByClass(com.qihao.MyRun.class);

        // TODO: specify a mapper

        job.setMapperClass(MyMap.class);

        // TODO: specify a reducer

        job.setReducerClass(MyReduce.class);

        // TODO: specify output types

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(IntWritable.class);

        // TODO: specify input and output DIRECTORIES (not files)

        FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        if (!job.waitForCompletion(true))

            return;

    }

}

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

工程配置