HDFS网页操作
Web Console: 50070、50090
HDFS的命令行操作
HDFS操作命令
-mkdir
-ls
-ls -R
-put
-moveFromLocal
-copyFromLocal
-copyToLocal
-get
-rm
-getmerge
-cp
-mv
-count
-du
-text、-cat
balancer
HDFS管理命令
-report
-safemode
Java API
创建目录
package demo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
/*
* 原因:
* Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
* Permission denied: user=lenovo, access=WRITE, inode="/folder1":root:supergroup:drwxr-xr-x
*
* 当前用户:lenovo 执行w权限
* HDFS的根的权限:root:supergroup:drwxr-xr-x
*
* 四种方式,执行程序:
* 1、设置一个属性
* 2、使用-D参数
* 3、改变目录的权限 hdfs dfs -chmod 777 /folder2
* 4、dfs.permissions ---> false 禁用HDFS的权限检查功能
*/
public class TestMkDir {
@Test
public void test1() throws Exception{
//方式一:设置一个属性,代表用户的身份
System.setProperty("HADOOP_USER_NAME", "root");
//指定NameNode的地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
//获取一个HDFS的客户端
FileSystem client = FileSystem.get(conf);
//创建目录
client.mkdirs(new Path("/folder1"));
//关闭客户端
client.close();
}
@Test
public void test2() throws Exception{
//指定NameNode的地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
//获取一个HDFS的客户端
FileSystem client = FileSystem.get(conf);
//创建目录
client.mkdirs(new Path("/folder2"));
//关闭客户端
client.close();
}
@Test
public void test3() throws Exception{
//指定NameNode的地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
//获取一个HDFS的客户端
FileSystem client = FileSystem.get(conf);
//创建目录
client.mkdirs(new Path("/folder2/folder3"));
//关闭客户端
client.close();
}
@Test
public void test4() throws Exception{
//指定NameNode的地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
//获取一个HDFS的客户端
FileSystem client = FileSystem.get(conf);
//创建目录
client.mkdirs(new Path("/folder4"));
//关闭客户端
client.close();
}
}
上传文件
package demo;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;
public class TestUpload {
@Test
public void test1() throws Exception{
//构造一个输入流
InputStream in = new FileInputStream("d:\\dowload\\hadoop-2.4.1.zip");
//配置NameNode地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
//客户端
FileSystem client = FileSystem.get(conf);
//得到一个输出流
OutputStream out = client.create(new Path("/tools/a.zip"));
//构造一个缓冲区
byte[] buffer = new byte[1024];
int len=0;
while((len=in.read(buffer)) >0) {
//读取到了数据
out.write(buffer, 0, len);
}
out.flush();
out.close();
in.close();
}
@Test
public void test2() throws Exception{
//构造一个输入流
InputStream in = new FileInputStream("d:\\dowload\\hadoop-2.4.1.zip");
//配置NameNode地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
//客户端
FileSystem client = FileSystem.get(conf);
//得到一个输出流
OutputStream out = client.create(new Path("/tools/b.zip"));
//使用工具类简化程序
IOUtils.copyBytes(in, out, 1024);
}
}
下载文件
package demo;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;
public class TestDownload {
@Test
public void test1() throws Exception{
//配置NameNode地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
//客户端
FileSystem client = FileSystem.get(conf);
//打开一个输入流 <------HDFS
InputStream in = client.open(new Path("/tools/a.zip"));
//构造一个输出流 ----> d:\temp\aa.zip
OutputStream out = new FileOutputStream("d:\\temp\\bb.zip");
//使用工具类简化程序
IOUtils.copyBytes(in, out, 1024);
// //构造一个缓冲区
// byte[] buffer = new byte[1024];
// int len=0;
// while((len=in.read(buffer)) >0) {
// //读取到了数据
// out.write(buffer, 0, len);
// }
// out.flush();
// out.close();
// in.close();
}
}
查看文件信息
package demo;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.junit.Test;
public class TestMetaData {
@Test
public void testCheckFileInfo() throws Exception{
//配置NameNode地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
//客户端
FileSystem client = FileSystem.get(conf);
//获取该目录下所有文件的信息
FileStatus[] filesStatus = client.listStatus(new Path("/tools"));
for(FileStatus f:filesStatus){
System.out.println(f.isDirectory()?"目录":"文件");
System.out.println(f.getPath().getName());
System.out.println(f.getBlockSize());
System.out.println("*************************");
}
client.close();
}
@Test
public void testCheckFileBlock() throws Exception{
//配置NameNode地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
//客户端
FileSystem client = FileSystem.get(conf);
//获取该文件的信息
FileStatus fs = client.getFileStatus(new Path("/tools/a.zip"));
//获取文件的数据块的信息
BlockLocation[] location = client.getFileBlockLocations(fs, 0, fs.getLen());
for(BlockLocation block:location){
//block.getHosts() ---> 为什么返回一个String[]??? System.out.println(Arrays.toString(block.getHosts()) + "\t"+ Arrays.toString(block.getNames()));
}
client.close();
}
}
HDFS的高级功能
1. 回收站
HDFS回收站的本质:ctrl +x 移动到一个隐藏目录
修改 core-site.xml 文件:
//默认禁用
<property>
<name>fs.trash.interval</name>
<value>1440</value>
</property>
没有回收站时
日志:
18/04/09 21:35:40 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 0 minutes, Emptier interval = 0 minutes.Deleted /tools ---> 成功删除(对比:回收站)
有回收站时
Moved: 'hdfs://bigdata111:9000/tools/a.zip' to trash at: hdfs://bigdata111:9000/user/root/.Trash/Current
查看回收站
hdfs dfs -lsr /user/root/.Trash/Current
从回收站中恢复
hdfs dfs -cp /user/root/.Trash/Current/tools/a.zip /tools
2、快照
本质:cp命令
管理命令
[-allowSnapshot <snapshotDir>]
[-disallowSnapshot <snapshotDir>]
操作命令
[-createSnapshot <snapshotDir> [<snapshotName>]]
[-deleteSnapshot <snapshotDir> <snapshotName>]
[-renameSnapshot <snapshotDir> <oldName> <newName>]
开启快照
hdfs dfsadmin -allowSnapshot /students
创建一个备份
hdfs dfs -createSnapshot /students backup_student_0411_01
日志:Created snapshot /students/.snapshot/backup_student_0411_01
hdfs dfs -put student03.txt /students
hdfs dfs -createSnapshot /students backup_student_0411_02
恢复快照
hdfs dfs -cp /input/.snapshot/backup_input_01/data.txt /input
3、配额
名称配额: 限制某个目录下,文件的个数
[-setQuota <quota> <dirname>...<dirname>]
[-clrQuota <dirname>...<dirname>]
hdfs dfs -mkdir /folder1
hdfs dfsadmin -setQuota 3 /folder1
实际是:N-1
空间配额: 限制某个目录下,文件的大小
[-setSpaceQuota <quota> [-storageType <storagetype>] <dirname>...<dirname>]
[-clrSpaceQuota [-storageType <storagetype>] <dirname>...<dirname>]
设置空间配额:1M
hdfs dfs -mkdir /folder2
hdfs dfsadmin -setSpaceQuota 1M /folder2
错误:
The DiskSpace quota of /folder2 is exceeded: quota = 1048576 B = 1 MB but diskspace consumed = 134217728 B = 128 MB
注意:设置的值一定不能小于128M
4、安全模式
hdfs dfsadmin -safemode get 查看安全模式状态
hdfs dfsadmin -safemode enter 进入安全模式状态
hdfs dfsadmin -safemode leave 离开安全模式