文章目录
- 概述
- 一、添加pom.xml依赖
- HDFS的java API 操作
- 创建Hdfs的FileSystem 环境
- 创建目录
- 判断文件或者目录是否存在
- 创建文件
- 上传本地文件到hdfs
- 下载文件到本地
- 删除文件或目录
- 查看文件列表信息
- 使用文件流的方式写hdfs文件
- 使用文件流的方式下载文件
- 完整的代码示例
- 报错:java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset.
- 报错解决
- 报错: Permission denied: user=TianTian, access=WRITE, inode="/root":root:supergroup:drwxr-xr-x
- 报错解决:
概述
本文描述的是在windows环境中使用java的api来操作hadoop的hdfs文件系统。hadoop集群安装在远程Linux环境中。
一、添加pom.xml依赖
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs-client</artifactId>
<version>3.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.3.1</version>
</dependency>
</dependencies>
HDFS的java API 操作
创建Hdfs的FileSystem 环境
- 方式一:使用配置文件的方式
将Hadoop集群的如下配置文件复制到项目的resource目录中,然后使用如下代码进行连接远程hadoop的hdfs文件系统。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
Configuration conf = new Configuration();
conf.addResource("core-site.xml");
conf.addResource("hdfs-site.xml");
conf.addResource("mapred-site.xml");
conf.addResource("yarn-site.xml");
FileSystem fs = FileSystem.get(conf);
- 方式二:使用代码中的配置的方式
//设置hadoop的NameNode的节点,注意不能使用standby节点
conf.set("fs.defaultFS","hdfs://192.168.0.115:8020");
//设置系统环境变量,操作hadoop的用户
System.setProperty("HADOOP_USER_NAME","root");
//设置Hadoop的dfs客户端使用hostname访问datanode
conf.set("dfs.client.use.datanode.hostname", "true");
FileSystem fs = FileSystem.get(conf);
//设置副本数
conf.set("dfs.replication", "2");
//设置块大小
conf.set("dfs.block.size", "64m");
TIPS:参数优先级:
- 1、客户端代码中设置的值
- 2、classpath下的用户自定义配置文件
- 3、然后是服务器的默认配置
上述FileSystem的环境创建好了之后,就可以使用fs的对象来进行操作了。
创建目录
//创建目录
Boolean flag = fs.mkdirs(new Path("/root/data"));
System.out.println(flag);
判断文件或者目录是否存在
Boolean isExists = fs.exists(new Path("/root/data/student2.txt"));
System.out.println(isExists);
创建文件
fs.create(new Path("/root/data/student2.txt"));
上传本地文件到hdfs
//上传文件
String srcFilePath = "D:\\javaworkspace\\BigData\\Hadoop\\HadoopApp\\HdfsApp\\src\\main\\java\\com\\hjt\\yxh\\hw\\HdfsApiTest.java";
String remoteFilePath = "/root/data/";
fs.copyFromLocalFile(new Path(srcFilePath),new Path(remoteFilePath));
下载文件到本地
//下载文件
fs.copyToLocalFile(new Path("/root/data"),new Path("D:\\javaworkspace\\BigData\\Hadoop\\MapReduceLearn\\src\\main\\resources\\"));
删除文件或目录
//删除文件,如果路径是目录,第二个参数控制是否递归删除
fs.delete(new Path("/root/data/"),true);
查看文件列表信息
//查看文件列表
RemoteIterator<LocatedFileStatus> fileList = fs.listFiles(new Path("/root/data/"), true);
while (fileList.hasNext()){
System.out.println(fileList.next().toString());
}
使用文件流的方式写hdfs文件
//使用流的方式写hdfs文件
FSDataOutputStream fsDataOutputStream = fs.append(new Path("/root/data/student2.txt"));
FileInputStream fileinputStream = new FileInputStream("D:\\javaworkspace\\BigData\\Hadoop\\HadoopApp\\HdfsApp\\src\\main\\java\\com\\hjt\\yxh\\hw\\HdfsApiTest.java");
byte[] buffer = new byte[1024*1024];
int read = 0;
while ((read=fileinputStream.read(buffer)) > 0){
fsDataOutputStream.write(buffer,0,read);
}
//或者使用IOUtil的方式
InputStream inputStream = new BufferedInputStream(fileinputStream);
IOUtils.copyBytes(inputStream,fsDataOutputStream,conf);
//关闭流
fileinputStream.close();
fsDataOutputStream.close();
使用文件流的方式下载文件
//使用流的方式下载文件
FileOutputStream fileOutputStream = new FileOutputStream("./test.txt");
FSDataInputStream fsDataInputStream = fs.open(new Path("/root/data/student2.txt"));
byte[] buffer2 = new byte[1024*1024];
int read2 = 0;
while ((read2 = fsDataInputStream.read(buffer2)) > 0){
fileOutputStream.write(buffer2,0,read2);
}
//或者使用IOUtils的方式
OutputStream outputStream = new BufferedOutputStream(fileOutputStream);
IOUtils.copyBytes(fsDataInputStream,outputStream,conf);
fileOutputStream.close();
fsDataInputStream.close();
完整的代码示例
package com.hjt.yxh.hw;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import java.io.*;
public class HdfsApiTest {
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
conf.addResource("core-site.xml");
conf.addResource("hdfs-site.xml");
conf.addResource("mapred-site.xml");
conf.addResource("yarn-site.xml");
// conf.set("fs.defaultFS","hdfs://192.168.0.115:8020");
System.setProperty("HADOOP_USER_NAME","root");
// conf.set("dfs.client.use.datanode.hostname", "true");
FileSystem fs = FileSystem.get(conf);
//创建目录
Boolean flag = fs.mkdirs(new Path("/root/data"));
System.out.println(flag);
//判断文件是否存在
Boolean isExists = fs.exists(new Path("/root/data/student2.txt"));
System.out.println(isExists);
if(isExists == false){
fs.create(new Path("/root/data/student2.txt"));
}
//查看文件信息
FileStatus fileStatus = fs.getFileStatus(new Path("/root/data/student2.txt"));
System.out.println(fileStatus.toString());
//下载文件
fs.copyToLocalFile(new Path("/root/data/"),new Path("D:\\javaworkspace\\BigData\\Hadoop\\MapReduceLearn\\src\\main\\resources\\"));
//上传文件
String srcFilePath = "D:\\javaworkspace\\BigData\\Hadoop\\HadoopApp\\HdfsApp\\src\\main\\java\\com\\hjt\\yxh\\hw\\HdfsApiTest.java";
String remoteFilePath = "/root/data/";
fs.copyFromLocalFile(new Path(srcFilePath),new Path(remoteFilePath));
//删除文件
fs.delete(new Path("/root/data/"),true);
//查看文件列表
RemoteIterator<LocatedFileStatus> fileList = fs.listFiles(new Path("/root/data/"), true);
while (fileList.hasNext()){
System.out.println(fileList.next().toString());
}
//使用流的方式写hdfs文件
FSDataOutputStream fsDataOutputStream = fs.append(new Path("/root/data/student2.txt"));
FileInputStream fileinputStream = new FileInputStream("D:\\javaworkspace\\BigData\\Hadoop\\HadoopApp\\HdfsApp\\src\\main\\java\\com\\hjt\\yxh\\hw\\HdfsApiTest.java");
byte[] buffer = new byte[1024*1024];
int read = 0;
while ((read=fileinputStream.read(buffer)) > 0){
fsDataOutputStream.write(buffer,0,read);
}
//或者使用IOUtil的方式
InputStream inputStream = new BufferedInputStream(fileinputStream);
IOUtils.copyBytes(inputStream,fsDataOutputStream,conf);
//关闭流
fileinputStream.close();
fsDataOutputStream.close();
//使用流的方式下载文件
FileOutputStream fileOutputStream = new FileOutputStream("./test.txt");
FSDataInputStream fsDataInputStream = fs.open(new Path("/root/data/student2.txt"));
byte[] buffer2 = new byte[1024*1024];
int read2 = 0;
while ((read2 = fsDataInputStream.read(buffer2)) > 0){
fileOutputStream.write(buffer2,0,read2);
}
//或者使用IOUtils的方式
OutputStream outputStream = new BufferedOutputStream(fileOutputStream);
IOUtils.copyBytes(fsDataInputStream,outputStream,conf);
fileOutputStream.close();
fsDataInputStream.close();
fs.close();
}
}
报错:java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset.
Exception in thread "main" java.lang.RuntimeException: java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset. -see https://wiki.apache.org/hadoop/WindowsProblems
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:736)
at org.apache.hadoop.util.Shell.getSetPermissionCommand(Shell.java:271)
at org.apache.hadoop.util.Shell.getSetPermissionCommand(Shell.java:287)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:978)
at org.apache.hadoop.fs.RawLocalFileSystem.mkOneDirWithMode(RawLocalFileSystem.java:660)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:700)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
at org.apache.hadoop.fs.ChecksumFileSystem.mkdirs(ChecksumFileSystem.java:788)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:513)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1064)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:417)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:390)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:340)
at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2547)
at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2516)
at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2492)
at com.hjt.yxh.hw.HdfsApiTest.main(HdfsApiTest.java:27)
Caused by: java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset. -see https://wiki.apache.org/hadoop/WindowsProblems
at org.apache.hadoop.util.Shell.fileNotFoundException(Shell.java:548)
at org.apache.hadoop.util.Shell.getHadoopHomeDir(Shell.java:569)
at org.apache.hadoop.util.Shell.getQualifiedBin(Shell.java:592)
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:689)
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:79)
at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:3741)
at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:3736)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3520)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:540)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:288)
at com.hjt.yxh.hw.HdfsApiTest.main(HdfsApiTest.java:14)
Caused by: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset.
at org.apache.hadoop.util.Shell.checkHadoopHomeInner(Shell.java:468)
at org.apache.hadoop.util.Shell.checkHadoopHome(Shell.java:439)
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:516)
... 7 more
报错解决
日志描述内容就是,没有设置 HADOOP_HOME 和 hadoop.home.dir 两项。而这两项就是配置在本地环境变量中的 Hadoop 地址,也就是需要我们在本地搭建Hadoop环境。
一、如果是远程连接Linux上的Hadoop集群,是不需要在本地再下载hadoop,只要下载winutils文件,然后配置环境变量,最后再把hadoop.dll文件放到 C:/windows/system32 下就可以了
winutils的安装与配置见博客描述 上述博客写的已经非常详细了,我就不在一一赘述。
报错: Permission denied: user=TianTian, access=WRITE, inode=“/root”:root:supergroup:drwxr-xr-x
Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=TianTian, access=WRITE, inode="/root":root:supergroup:drwxr-xr-x
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:504)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:336)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermissionWithContext(FSPermissionChecker.java:360)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:240)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1939)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1923)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkAncestorAccess(FSDirectory.java:1882)
at org.apache.hadoop.hdfs.server.namenode.FSDirMkdirOp.mkdirs(FSDirMkdirOp.java:60)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:3410)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:1170)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:740)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:600)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:568)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:552)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1093)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1035)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:963)
at java.base/java.security.AccessController.doPrivileged(AccessController.java:691)
at java.base/javax.security.auth.Subject.doAs(Subject.java:425)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2966)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1573)
at org.apache.hadoop.ipc.Client.call(Client.java:1519)
at org.apache.hadoop.ipc.Client.call(Client.java:1416)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:242)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:129)
at com.sun.proxy.$Proxy12.mkdirs(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.mkdirs(ClientNamenodeProtocolTranslatorPB.java:674)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:64)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:564)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)
at com.sun.proxy.$Proxy13.mkdirs(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.primitiveMkdir(DFSClient.java:2499)
... 8 more
报错解决:
设置一下环境变量HADOOP_USER_NAME 为root
System.setProperty("HADOOP_USER_NAME","root");