hdfs java api详解 hdfs的api

转载

mob64ca14031c97 2023-10-23 10:24:32

文章标签 hdfs java api详解 HDFS的API hadoop apache System 文章分类 Java 后端开发

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/**
 * hdfs入门程序:
 * 面向对象：一切皆对象
 * 文件系统也可以看做是一类事物、FileSystem
 * 
 * 一定有共同的行为和属性：
 * 1.属性--就是--URL
 * 本地文件系统的URL:	file:///c:myProgram
 * HDFS文件系统的URL:	fs.defaultFS=hdfs://hadoop02:9000
 * 2.行为/方法--就是--上传和下载
 * 
 * FileSystem类的相关方法：
 * .get()----->静态方法，用来获取FileSystem类的这个实例对象的，而不是 做下载的,此方法最少传一个参数否则要传三个参数
 */
public class HelloHDFS {
	/**
	 * 从windows上传和下载到HDFS
	 */
	public static void main(String[] args) throws Exception {
		/**
		 * 插曲：创建对象的方式有五种：
		 * 1.构造方法(一般用这种)
		 * 2.静态方法(一般用这种)
		 * 3.反射
		 * 4.克隆
		 * 5.反序列化
		 */
		//Configuration是配置对象，conf可以理解为包含了所有配置信息的一个集合，可以认为是Map
		//在执行这行代码的时候底层会加载一堆配置文件 core-default.xml;hdfs-default.xml;mapred-default.xml;yarn-default.xml
		Configuration conf = new Configuration();
		//相当于通过配置文件的key获取到value的值
		conf.set("fs.defaultFS","hdfs://hadoop02:9000");
		/**
		 * 更改操作用户有两种方式：（系统会自动识别我们的操作用户，如果我们设置，将会报错会拒绝Administrator用户（windows用户））
		 * 1.直接设置运行环境中的用户名为hadoop，此方法不方便因为打成jar包执行还是需要改用户，右键Run As--Run Configurations--Arguments--VM arguments--输入-DHADOOP_USER_NAME=hadoop
		 * 2.直接在代码中进行声明
		 */
		//更改操作用户为hadoop 
		System.setProperty("HADOOP_USER_NAME","hadoop");
		//获取文件系统对象(目的是获取HDFS文件系统)
		FileSystem fs=FileSystem.get(conf);
		//直接输出fs对象是org.apache.hadoop.fs.LocalFileSystem@70e8f8e
		//这说明是本地文件系统对象。代码在eclipse所嵌入的jvm中执行的，jvm是安装在Windows下的，所以是windows文件系统对象
		//所以要返回来指定HDFS
		System.out.println(fs);
		//上传的API
		fs.copyFromLocalFile(new Path("c:/ss.txt"), new Path("/a"));
		//下载的API										不改名就不用写文件名字也行
		fs.copyToLocalFile(new Path("/a/qqq.txt"), new Path("c:/qqq.txt"));
		fs.close();
		/**
		 * .crc 是校验文件
		 * 每个块的元数据信息都只会记录合法的数据起始偏移量。
		 * 如果进行了非法的数据追加，最终是能够下载正确的数据的。
		 * 如果在数据的中间更改了数据，造成了采用CRC算法计算出来的校验值和最初存入HDFS的校验值不一致。HDFS就认为当前这个文件被损坏了。
		 */
	}
}

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class Hdfs_Default {
	
	public static void main(String[] args) throws Exception {
		/**
		 * 结论：如果需要项目代码自动加载配置文件中的信息，那么就必须把配置文件改成-default.xml或者-site.xml的名称，而且必须放置在src下
		 * 如果不叫这个名，或者不在src下，也需要加载这些配置文件中的参数，必须使用conf对象提供的方法手动加载
		 * 
		 * 依次加载的参数信息的顺序是：
		 * 1.加载core/hdfs/mapred/yarn-default.xml
		 * 2.加载通过conf.addResource()加载的配置文件
		 * 3.加载conf.set(name,value)
		 */
		//Configuration是配置对象，conf可以理解为包含了所有配置信息的一个集合，可以认为是Map
		//在执行这行代码的时候底层会加载一堆配置文件 core-default.xml;hdfs-default.xml;mapred-default.xml;yarn-default.xml
		Configuration conf = new Configuration();
		//相当于通过配置文件的key获取到value的值
		conf.set("fs.defaultFS","hdfs://hadoop02:9000");
		conf.addResource("myconfig/hdfs-site.xml");
		System.setProperty("HADOOP_USER_NAME","hadoop");
		
		FileSystem fs=FileSystem.get(conf);
		
		System.out.println(fs);
		//上传的API
		fs.copyFromLocalFile(new Path("c:/ss.txt"), new Path("/a"));
		//下载的API										不改名就不用写文件名字也行
		fs.copyToLocalFile(new Path("/a/qqq.txt"), new Path("c:/qqq.txt"));
		fs.close();
	}
}

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

public class Hdfs_API {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
		FileSystem fs = FileSystem.get(conf);
		
		//listFiles()方法用来列出某个文件夹下的所有文件，参数1是路径，参数2是表示是否级联（该文件夹下面还有 子文件 要不要看,注意没有 子文件夹!!）
		//RemoteIterator基本可以认为是一个迭代器
		RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/a"), false);
		while(listFiles.hasNext()) {
			LocatedFileStatus file = listFiles.next();
			//文件的存储路径，以hdfs://开头的全路径----> hdfs://hadoop02:9000/a/gg.txt
			System.out.println(file.getPath());
			//只是文件名 gg.txt
			System.out.println(file.getPath().getName());
			//块大小
			System.out.println(file.getBlockSize());
			//分组信息
			System.out.println(file.getGroup());
			//文件的长度
			System.out.println(file.getLen());
			//文件所有者
			System.out.println(file.getOwner());
			//类，就是object的那个方法没有什么特殊的。
			//会返回一个你的对象所对应的一个Class的对象，这个返回来的对象保存着你的原对象的类信息，比如你的原对象的类名叫什么，类里有什么方法，字段等。
			System.out.println(file.getClass());
			//权限信息
			System.out.println(file.getPermission());
			//副本个数，从元数据中找出来几个的
			System.out.println(file.getReplication());
			
			//块位置相关的信息
			BlockLocation[] blockLocations = file.getBlockLocations();
			//blockLocations对象的长度就是块的数量
			System.out.println(blockLocations.length);
			
			for(BlockLocation bl : blockLocations) {
				//得到每一个块到底在哪个机器里(案例一个文件有3个块，三个副本)
				String[] hosts = bl.getHosts();
				//为了方便就不循环了，最终的显示 hadoop03-hadoop05-hadoop02,hadoop02-hadoop03-hadoop05,hadoop02-hadoop05-hadoop04
				System.out.println(hosts[0]+"-"+hosts[1]+hosts[2]);
				//逻辑的一个路径
				bl.getTopologyPaths();
			}
		}
	}
}

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class Hdfs_API2 {
	public static void main(String[] args) throws Exception {
		
		Configuration conf = new Configuration();
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
		FileSystem fs = FileSystem.get(conf);
		//状态,此方法与listFiles不同,不支持传true或false,即不能级联，如果想实现级联就采用递归的方式
		FileStatus[] listStatus = fs.listStatus(new Path("/"));
		for(FileStatus fss: listStatus) {
			//判断是不是文件夹
			boolean directory = fss.isDirectory();
			//判断是不是文件
			boolean file = fss.isFile();
			String name = fss.getPath().getName();
			if(file) {
				System.out.println(name+":文件");
			}else {
				System.out.println(name+":文件夹");
			}
		}
		fs.close();
		/**
		 * 插曲：HDFS集群的上传和下载的底层就是采用流的方式，引深出来文件系统的两个API
		 * 分别上open()打开一个输入流，create()创建一个输出流。
		 * 这样其实我们不用调用copyFromLocalFile和copyToLocalFile的API
		 * 我们自己采用流的方式进行实现，在本地系统中new一个输入流，然后创建HDFS集群的输出流对象
		 * 然后把输入流上的数据拷贝到输出流上去就可以了。
		 */
	}
}

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
/**
 * 自己实现copyFromLocalFile API的底层流的方式上传
 */
public class UploadDateByStream {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
		FileSystem fs = FileSystem.get(conf);
		//创建本地系统的输入流
		InputStream in = new FileInputStream(new File("c:/base.sh"));
		//创建HDFS的输出流，把文件上传到HDFS哪里去并重命名
		FSDataOutputStream out = fs.create(new Path("/aa/new_base"));
		//注意导包					  buffSize 是否关闭流
		IOUtils.copyBytes(in, out,4096,true);
		
		fs.close();
	}
}

import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
/**
 * 自己实现copyToLocalFile API的底层流的方式下载
 */
public class DownloadDateByStream {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
		FileSystem fs = FileSystem.get(conf);
		
		FSDataInputStream in = fs.open(new Path("/aa/new_base"));
		OutputStream out =new FileOutputStream(new File("c:/ab.sh"));
		IOUtils.copyBytes(in, out, 4096, true);
		fs.close();
	}
}

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

/**  
 * 描述: 删除指定文件夹下的空文件夹 和 空文件
 */
public class DeleteEmptyDirAndFile {
	
	static FileSystem fs = null;

	public static void main(String[] args) throws Exception {
		
		initFileSystem();

//		 创建测试数据
//		makeTestData();

		// 删除测试数据
//		deleteTestData();

		// 删除指定文件夹下的空文件和空文件夹
		deleteEmptyDirAndFile(new Path("/aa"));
	}
	
	/**
	 * 删除指定文件夹下的 空文件 和 空文件夹
	 * @throws Exception 
	 */
	public static void deleteEmptyDirAndFile(Path path) throws Exception {
		
		//当是空文件夹时
		FileStatus[] listStatus = fs.listStatus(path);
		if(listStatus.length == 0){
			fs.delete(path, true);
			return;
		}
		
		// 该方法的结果：包括指定目录的  文件 和 文件夹
		RemoteIterator<LocatedFileStatus> listLocatedStatus = fs.listLocatedStatus(path);
		
		while (listLocatedStatus.hasNext()) {
			LocatedFileStatus next = listLocatedStatus.next();

			Path currentPath = next.getPath();
			// 获取父目录
			Path parent = next.getPath().getParent();
			
			// 如果是文件夹，继续往下遍历，删除符合条件的文件（空文件夹）
			if (next.isDirectory()) {
				
				// 如果是空文件夹
				if(fs.listStatus(currentPath).length == 0){
					// 删除掉
					fs.delete(currentPath, true);
				}else{
					// 不是空文件夹，那么则继续遍历
					if(fs.exists(currentPath)){
						deleteEmptyDirAndFile(currentPath);
					}
				}
				
			// 如果是文件
			} else {
				// 获取文件的长度
				long fileLength = next.getLen();
				// 当文件是空文件时， 删除
				if(fileLength == 0){
					fs.delete(currentPath, true);
				}
			}
			
			// 当空文件夹或者空文件删除时，有可能导致父文件夹为空文件夹，
			// 所以每次删除一个空文件或者空文件的时候都需要判断一下，如果真是如此，那么就需要把该文件夹也删除掉
			int length = fs.listStatus(parent).length;
			if(length == 0){
				fs.delete(parent, true);
			}
		}
	}
	
	/**
	 * 初始化FileSystem对象之用
	 */
	public static void initFileSystem() throws Exception{
		Configuration conf = new Configuration();
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		conf.addResource("config/core-site.xml");
		conf.addResource("config/hdfs-site.xml");
		fs = FileSystem.get(conf);
	}

	/**
	 * 创建 测试 数据之用
	 */
	public static void makeTestData() throws Exception {
		
		String emptyFilePath = "D:\\bigdata\\1704mr_test\\empty.txt";
		String notEmptyFilePath = "D:\\bigdata\\1704mr_test\\notEmpty.txt";

		// 空文件夹 和 空文件 的目录
		String path1 = "/aa/bb1/cc1/dd1/";
		fs.mkdirs(new Path(path1));
		fs.mkdirs(new Path("/aa/bb1/cc1/dd2/"));
		fs.copyFromLocalFile(new Path(emptyFilePath), new Path(path1));
		fs.copyFromLocalFile(new Path(notEmptyFilePath), new Path(path1));

		// 空文件 的目录
		String path2 = "/aa/bb1/cc2/dd2/";
		fs.mkdirs(new Path(path2));
		fs.copyFromLocalFile(new Path(emptyFilePath), new Path(path2));

		// 非空文件 的目录
		String path3 = "/aa/bb2/cc3/dd3";
		fs.mkdirs(new Path(path3));
		fs.copyFromLocalFile(new Path(notEmptyFilePath), new Path(path3));

		// 空 文件夹
		String path4 = "/aa/bb2/cc4/dd4";
		fs.mkdirs(new Path(path4));

		System.out.println("测试数据创建成功");
	}

	/**
	 * 删除 指定文件夹
	 * @throws Exception 
	 */
	public static void deleteTestData() throws Exception {
		boolean delete = fs.delete(new Path("/aa"), true);
		System.out.println(delete ? "删除数据成功" : "删除数据失败");
	}

}

import java.io.IOException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;

public class DeleteSHFile {
	
	public static void main(String[] args) {
		
		
		String str = "lkfdjsjd.sh";
		
		System.out.println(str.substring(str.length()-3, str.length()));
	}
	
	/**
	 * 删除指定目录下的所有 .sh 结尾的文件
	 * @param fs
	 * @param path
	 * @throws IOException
	 */
	public static void deleteSHFile(FileSystem fs, String path) throws IOException {
		
		/*
		 * 该方法的结果：包括指定目录的  文件 和 文件夹
		 * 
		 */
		RemoteIterator<LocatedFileStatus> listLocatedStatus = fs.listLocatedStatus(new Path(path));
		while(listLocatedStatus.hasNext()){
			LocatedFileStatus next = listLocatedStatus.next();
			
			String nextPath = next.getPath().toUri().toString();
			
			// 如果是文件夹，继续往下遍历，删除符合条件的文件
			if(next.isDirectory()){
				deleteSHFile(fs, nextPath);
			}else{
				// 判断 nextPath文件 是否  是 .sh 类型的文件
				// listStatus的长度  要么 是  1  要么是  0
				FileStatus[] listStatus = fs.listStatus(new Path(nextPath), new PathFilter() {
					
					@Override
					public boolean accept(Path path) {
						
						String name = path.getName();
						int length = path.getName().length();
						System.out.println(path.toString()  + "  \t "+ length);
						if(length > 3){
							return name.substring(length-3, length).equals(".sh") ;
						}else{
							return false;
						}
					}
				});
				
				// listStatus  里面所有的东西都是  以  .sh 结尾的 文件 或者 文件夹，  只是 path当前文件夹下的， 不包括子文件夹
				// 该方法删除的是  path 路径下 所有的 以 .sh 结尾的文件。  不包括  path下子文件夹当中的其他内容
				for(FileStatus fileStatus : listStatus){
					System.out.println("&&&&&&&&&& \t"+fileStatus.getPath());
					fs.delete(fileStatus.getPath(), true);
				}
			}
		}
	}
}

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class HDFS_API_DEMO {

	public static FileSystem fs;
	public static Configuration conf;
	
	// 初始一个本地文件系统
	public static void initLocalFS() throws Exception{
		conf = new Configuration();
		// 利用FileSystem的自身携带的get方法获取FileSystem的一个实例
		fs = FileSystem.get(conf);
	}
	
	// 代码设置访问初始化一个HDFS文件系统示例对象
	public static void initHDFS() throws Exception{
		conf = new Configuration();
		// 设置了该参数表示从hdfs获取一个分布式文件系统的实例，否则默认表示获取一个本地文件系统的实例
		conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
		// 设置客户端访问hdfs集群的身份标识
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		// 利用FileSystem的自身携带的get方法获取FileSystem的一个实例
		fs = FileSystem.get(conf);
	}
	
	// 从配置文件初始化一个HDFS文件系统实例对象
	public static void initHDFSWithConf() throws Exception{
		/**
		 * 构造一个配置参数对象，设置一个参数：我们要访问的hdfs的URI
		 * 从而FileSystem.get()方法就知道应该是去构造一个访问hdfs文件系统的客户端，以及hdfs的访问地址 new
		 * Configuration();的时候,它就会去加载jar包中的hdfs-default.xml
		 * 然后再加载classpath下的hdfs-site.xml
		 */
		Configuration conf = new Configuration();
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		conf.addResource("config/core-site.xml");
		conf.addResource("config/hdfs-site.xml");
		conf.addResource("config/mapred-site.xml");
		conf.addResource("config/yarn-site.xml");

		// conf.set("fs.defaultFS", "hdfs://hadoop01:9000");
		// 参数优先级： 1、客户端代码中设置的值 2、classpath下的用户自定义配置文件 3、然后是服务器的默认配置
		conf.set("dfs.replication", "2");
		conf.set("dfs.block.size", "64m");

		// 获取一个hdfs的访问客户端，根据参数，这个实例应该是DistributedFileSystem的实例
		// 如果这样去获取，那conf里面就可以不要配"fs.defaultFS"参数，而且，这个客户端的身份标识已经是hadoop用户
		fs = FileSystem.get(conf);
	}

	@Before
	public void init() throws Exception {
//		initLocalFS();
//		initHDFS();
		initHDFSWithConf();
	}

	/**
	 * 创建文件夹
	 */
	@Test
	public void testMkdir() throws Exception {
		System.out.println(fs.mkdirs(new Path("/ccc/bbb/aaa")));
	}

	/**
	 * 上传文件
	 */
	@Test
	public void testCopyFromLocal() throws Exception {
		// src : 要上传的文件所在的本地路径
		// dst : 要上传到hdfs的目标路径
		Path src = new Path("C:/software/hadoop-eclipse-plugin-2.6.4.jar");
		Path dst = new Path("/");
		fs.copyFromLocalFile(src, dst);
	}

	/**
	 * 下载文件
	 */
	@Test
	public void testCopyToLocal() throws Exception {
		fs.copyToLocalFile(new Path("/wordcount/input/helloWorld.txt"), new Path("c:/"));
//		fs.copyToLocalFile(new Path("/wordcount/input/helloWorld.txt"), new Path("d:/"), true);
	}

	/**
	 * 删除文件 或者 文件夹
	 */
	@Test
	public void testRemoveFileOrDir() throws Exception {
		// 删除文件或者文件夹，如果文件夹不为空，这第二个参数必须有， 而且要为true
		fs.delete(new Path("/ccc/bbb"), true);
	}

	/**
	 * 重命名文件 或者 文件夹
	 */
	@Test
	public void testRenameFileOrDir() throws Exception {
		// fs.rename(new Path("/ccc"), new Path("/vvv"));
		fs.rename(new Path("/hadoop-eclipse-plugin-2.6.4.jar"), new Path("/eclipsePlugin.jar"));
	}

	/**
	 * 显示 指定文件夹下  所有的文件
	 */
	@Test
	public void testListFiles() throws Exception {
		RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/wordcount"), true);
		while (listFiles.hasNext()) {
			LocatedFileStatus fileStatus = listFiles.next();
			System.out.print(fileStatus.getPath() + "\t");
			System.out.print(fileStatus.getPath().getName() + "\t");
			System.out.print(fileStatus.getBlockSize() + "\t");
			System.out.print(fileStatus.getPermission() + "\t");
			System.out.print(fileStatus.getReplication() + "\t");
			System.out.println(fileStatus.getLen());

			BlockLocation[] blockLocations = fileStatus.getBlockLocations();
			for (BlockLocation bl : blockLocations) {
				System.out.println("Block Length:" + bl.getLength() + "   Block OffSet:" + bl.getOffset());
				String[] hosts = bl.getHosts();
				for (String str : hosts) {
					System.out.print(str + "\t");
				}
				System.out.println();
			}

			System.out.println("---------------------------------------");
		}
	}

	/**
	 * 查看指定文件下 的 文件 或者 文件夹。 不包含子文件夹下的内容
	 */
	@Test
	public void testListStatus() throws Exception {
		FileStatus[] listStatus = fs.listStatus(new Path("/wordcount"));
		String flag = "";
		for (FileStatus status : listStatus) {
			if (status.isDirectory()) {
				flag = "Directory";
			} else {
				flag = "File";
			}
			System.out.println(flag + "\t" + status.getPath());
		}
	}
	
	/**
	 * 关闭 FS 示例对象
	 */
	@After
	public void close(){
		try {
			fs.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class HDFS_DELETE_CLASS {
	
	public static final String FILETYPE = "txt";
	
	public static void main(String[] args) throws Exception {
		
		new HDFS_DELETE_CLASS().rmrClassFile(new Path("/ddd"));
	}
	
	public void rmrClassFile(Path path) throws Exception{
		
		// 首先获取集群必要的信息，以得到FileSystem的示例对象fs
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop02:9000"), conf, "hadoop");
		
		// 首先检查path本身是文件夹还是目录
		FileStatus fileStatus = fs.getFileStatus(path);
		boolean directory = fileStatus.isDirectory();
		
		// 根据该目录是否是文件或者文件夹进行相应的操作
		if(directory){
			// 如果是目录
			checkAndDeleteDirectory(path, fs);
		}else{
			// 如果是文件，检查该文件名是不是FILETYPE类型的文件
			checkAndDeleteFile(path, fs);
		}
	}
	
	// 处理目录
	public static void checkAndDeleteDirectory(Path path, FileSystem fs) throws Exception{
		// 查看该path目录下一级子目录和子文件的状态
		FileStatus[] listStatus = fs.listStatus(path);
		for(FileStatus fStatus: listStatus){
			Path p = fStatus.getPath();
			// 如果是文件，并且是以FILETYPE结尾，则删掉，否则继续遍历下一级目录
			if(fStatus.isFile()){
				checkAndDeleteFile(p, fs);
			}else{
				checkAndDeleteDirectory(p, fs);
			}
		}
	}
	
	// 檢查文件是否符合刪除要求，如果符合要求則刪除，不符合要求则不做处理
	public static void checkAndDeleteFile(Path path, FileSystem fs) throws Exception{
		String name = path.getName();
		System.out.println(name);
		/*// 直接判断有没有FILETYPE这个字符串,不是特别稳妥，并且会有误操作，所以得判断是不是以FILETYPE结尾
		if(name.indexOf(FILETYPE) != -1){
			fs.delete(path, true);
		}*/
		// 判断是不是以FILETYPE结尾
		int startIndex = name.length() - FILETYPE.length();
		int endIndex = name.length();
		// 求得文件后缀名
		String fileSuffix = name.substring(startIndex, endIndex);
		if(fileSuffix.equals(FILETYPE)){
			fs.delete(path, true);
		}
	}
}

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Before;
import org.junit.Test;

/**
 * 相对那些封装好的方法而言的更底层一些的操作方式 上层那些mapreduce spark等运算框架，
        去hdfs中获取数据的时候，就是调的这种底层的api
 */
public class HDFS_Stream_DEMO {

	FileSystem fs = null;

	@Before
	public void init() throws Exception {
		Configuration conf = new Configuration();
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		// conf.set("fs.defaultFS", "hdfs://hadoop01:9000");

		// 如果core-site.xml和hdfs-site.xml在src下，也就是在classpath下
		// 那么conf对象在初始化的时候会自动加载 这两个配置文件的信息进来
		conf.addResource("config/core-site.xml");
		conf.addResource("config/hdfs-site.xml");

		fs = FileSystem.get(conf);
	}

	/**
	 * 使用流的方式去从HDFS上下载文件
	 */
	@Test
	public void testDownLoadFileToLocal() throws IllegalArgumentException, IOException {
		// 先获取一个文件的输入流----针对hdfs上的
		FSDataInputStream in = fs.open(new Path("/wordcount/student.txt"));
		// 再构造一个文件的输出流----针对本地的
		FileOutputStream out = new FileOutputStream(new File("c:/student.txt"));
		// 再将输入流中数据传输到输出流
		IOUtils.copyBytes(in, out, 4096);
	}

	/**
	 * 使用流的方式往 HDFS 上上传文件
	 */
	@Test
	public void testUploadByStream() throws Exception {
		// hdfs文件的输出流
		FSDataOutputStream fsout = fs.create(new Path("/wordcount/student.txt"));
		// 本地文件的输入流
		FileInputStream fsin = new FileInputStream("d:/bigdata/student/student.txt");
		IOUtils.copyBytes(fsin, fsout, 4096);
	}

	/**
	 * hdfs支持随机定位进行文件读取，而且可以方便地读取指定长度 用于上层分布式运算框架并发处理数据
	 */
	@Test
	public void testRandomAccess() throws IllegalArgumentException, IOException {
		// 先获取一个文件的输入流----针对hdfs上的
		FSDataInputStream in = fs.open(new Path("/wordcount/student.txt"));
		// 可以将流的起始偏移量进行自定义
		in.seek(22);
		// 再构造一个文件的输出流----针对本地的
		FileOutputStream out = new FileOutputStream(new File("c:/iloveyou.line.2.txt"));
		IOUtils.copyBytes(in, out, 19L, true);
	}

	/**
	 * 读取指定的block
	 */
	@Test
	public void testCat() throws IllegalArgumentException, IOException {

		FSDataInputStream in = fs.open(new Path("/weblog/input/access.log.10"));
		// 拿到文件信息
		FileStatus[] listStatus = fs.listStatus(new Path("/weblog/input/access.log.10"));
		// 获取这个文件的所有block的信息
		BlockLocation[] fileBlockLocations = fs.getFileBlockLocations(listStatus[0], 0L, listStatus[0].getLen());

		// 第一个block的长度
		long length = fileBlockLocations[0].getLength();
		// 第一个block的起始偏移量
		long offset = fileBlockLocations[0].getOffset();

		System.out.println(length);
		System.out.println(offset);

		// 获取第一个block写入输出流
		// IOUtils.copyBytes(in, System.out, (int)length);
		
		byte[] b = new byte[4096];
		FileOutputStream os = new FileOutputStream(new File("d:/block0"));
		while (in.read(offset, b, 0, 4096) != -1) {
			os.write(b);
			offset += 4096;
			if (offset > length)
				return;
		}
		os.flush();
		os.close();
		in.close();
	}

}

import java.io.InputStream;
import java.net.URL;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FsUrlStreamHandlerFactory;
import org.apache.hadoop.io.IOUtils;
  
/**
 * 描述：使用URL方式读取一个文件的全部内容
 */
public class HDFS_URLCat {  
	
	private static Configuration conf = new Configuration();
  
    static {  
    	conf.set("fs.defaultFS", "hdfs://hadoop06:9000");
        URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory(conf));  
    }  
  
    public static void main(String[] args) throws Exception {  
        InputStream in = null;  
        try {  
            in = new URL("hdfs://hadoop06:9000/wc/input/sort.txt").openStream();  
            IOUtils.copyBytes(in, System.out, 4096, false);  
        } finally {  
            IOUtils.closeStream(in);  
        }  
    }  
}

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

public class HDFSDemo {

	static FileSystem fs = null;
	static Configuration conf = null;
	
	public static void main(String[] args) throws Exception {
		init();
		
//		System.out.println(fs instanceof DistributedFileSystem);
//		System.out.println(fs instanceof RawLocalFileSystem);
		
//		testMkdirs();
//		testPut();
//		testGet();
		testConf();
//		testDelete();
//		testRename();
//		testList();
//		testCopyToLocalFile();
		
		close();
	}
	
	public static void testConf(){
		
		/**
		 * 给我们要操作的hdfs集群设置配置信息的方式有三种：
		 * 第一种：通过代码，用conf.set()
		 * 第二种：直接接在jar里面所携带的默认的core-default.xml
		 * 第三种：加载我们项目自带的关于集群配置相关的xml
		 */
		
		/**
		 * 1 > 3 > 2  加载级别高低，也就是谁后生效
		 * 加载顺序：默认配置 ， 集群安装配置=项目配置文件， 代码里面设置的配置信息
		 */
		
		System.out.println("文件系统URI："+conf.get("fs.defaultFS"));
		System.out.println("HDFS的副本块："+conf.get("dfs.replication"));
		System.out.println("默认的冗余块删除时间:"+conf.get("dfs.blockreport.intervalMsec"));
		System.out.println("元数据checkpoint的工作目录："+conf.get("dfs.namenode.checkpoint.dir"));
		System.out.println("元数据checkpoint的edits目录："+conf.get("dfs.namenode.checkpoint.edits.dir"));
		System.out.println("checkpoint最大重试次数："+conf.get("dfs.namenode.checkpoint.max-retries"));
		
		System.out.println("成功最小副本数："+conf.get("dfs.replication.min"));
		
		System.out.println("检查是否满足checkpoint条件的频率："+conf.get("dfs.namenode.checkpoint.check.period"));
		System.out.println("满足checkpoint条件的事务条数："+conf.get("dfs.namenode.checkpoint.txns"));
		System.out.println("满足checkpoint条件的时间间隔："+conf.get("dfs.namenode.checkpoint.period"));
		System.out.println("元数据大小满足chekcpoint条件："+conf.get("fs.checkpoint.period"));
		
//		System.out.println("元数据大小满足chekcpoint条件："+conf.get("dfs.namenode.checkpoint.size"));
//		System.out.println("元数据大小满足chekcpoint条件："+conf.get("dfs.namenode.checkpoint.check.period.size"));
//		System.out.println("元数据大小满足chekcpoint条件："+conf.get("fs.namenode.checkpoint.size"));
//		System.out.println("元数据大小满足chekcpoint条件："+conf.get("fs.checkpoint.size"));
//		File file = new File("myxmltext.xml");
//		String absolutePath = file.getAbsolutePath();
//		System.out.println(absolutePath);
//		conf.addResource(new Path("myxmltext.xml"));
//		conf.addResource(new Path("hdfs://hadoop02:9000/test.xml"));
//		conf.addResource("myxmltext.xml");
		System.out.println(conf.get("myname"));
		
//		NameNode.format(conf);
	}
	
	/**
	 * 初始化fs链接
	 * @throws IOException
	 */
	public static void init()  throws IOException{
		conf = new Configuration();
		// 第一种：普通分布式集群链接信息
//		conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
//		conf.set("fs.defaultFS", "hdfs://myha01/");
		
		// 第二种方式，链接高可用集群，把所有的配置文件都拿过来，加入到代码里去
		conf.addResource("conf/core-site.xml");
		conf.addResource("conf/hdfs-site.xml");
		conf.addResource("conf/mapred-site.xml");
		conf.addResource("conf/yarn-site.xml");
		
		// 第三种，连接高可用集群，使用set方式加入参数
//		conf.set("fs.defaultFS", "hdfs://myha01");
//		conf.set("dfs.nameservices", "myha01");
//		conf.set("dfs.ha.namenodes.myha01", "nn1,nn2");
//		conf.set("dfs.namenode.rpc-address.myha01.nn1", "hadoop02:9000");
//		conf.set("dfs.namenode.rpc-address.myha01.nn2", "hadoop03:9000");
		
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		fs = FileSystem.get(conf);
	}
	
	/**
	 * 关闭fs链接
	 * @throws IOException
	 */
	public static void close()  throws IOException{
		fs.close();
	}
	
	/**
	 * 删除文件夹或者文件
	 * @throws IOException
	 */
	public static void testDelete()   throws IOException{
		fs.delete(new Path("/abcdddddd"), true);
	}
	
	/**
	 * 重命名
	 * @throws IOException
	 */
	public static void testRename() throws IOException{
		fs.rename(new Path("/txt1.txt"), new Path("/huangbo.txt"));
	}
	
	/**
	 * 遍历文件
	 * @throws IOException
	 */
	public static void testList() throws IOException{
		
		// 遍历某个路径下所有的文件节点。包括文件和文件夹
		FileStatus[] listStatus = fs.listStatus(new Path("/"));
		for(FileStatus fss: listStatus){
			boolean directory = fss.isDirectory();
			if(directory){
				System.out.println(fss.getPath()+" --  文件夹");
			}else{
				System.out.println(fss.getPath()+" --  文件");
			}
			System.out.println(fss.getLen());
		}
		
		// 只遍历文件
		RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
		while(listFiles.hasNext()){
			LocatedFileStatus next = listFiles.next();
			boolean directory = next.isDirectory();
			BlockLocation[] blockLocations = next.getBlockLocations();
			
			for(BlockLocation bl:  blockLocations){
				String[] hosts = bl.getHosts();
				System.out.println("起始偏移量："+bl.getOffset()+"\t长度："+bl.getLength());
				String[] topologyPaths = bl.getTopologyPaths();
				for(String s: topologyPaths){
					System.out.print(s+"\t");
				}
				System.out.println();
				for(String s: hosts){
					System.out.print(s + "\t");
				}
				System.out.println(next.getPath());
			}
			
			/*if(directory){
				System.out.println(next.getPath()+" --  文件夹"+ blockLocations.length);
			}else{
				System.out.println(next.getPath()+" --  文件"+ blockLocations.length);
			}*/
		}
	}
	
	/**
	 * 创建文件夹
	 * @throws IOException
	 */
	public static void testMkdirs() throws IOException{
		boolean mkdirs = fs.mkdirs(new Path("/abcdccccc"));
		System.out.println(mkdirs);
	}
	
	/**
	 * 上传文件
	 * @throws IOException
	 */
	public static void testPut()  throws IOException{
		fs.copyFromLocalFile(new Path("C:/wdata/student.txt"), new Path("/abcd.txt"));
	}
	
	/**
	 * 下载文件
	 * @throws IOException
	 */
	public static void testGet()   throws IOException{
//		fs.copyToLocalFile(new Path("/abcd/student.txt"), new Path("C:/ss.txt"));
		
//		fs.copyToLocalFile(new Path("/txt.txt"), new Path("c:/1236666.txt"));
//		fs.moveToLocalFile(new Path("/txt.txt"), new Path("c:/12322226666.txt"));
//		fs.copyToLocalFile(false, new Path("/txt.txt"), new Path("c:/1666.txt"), false);
		
//		fs.copyToLocalFile(false, new Path("/txt.txt"), new Path("c:/166655.txt"), false);
		fs.copyToLocalFile(false, new Path("/txt.txt"), new Path("c:/166644.txt"), true);
		
		System.out.println("is done");
	}
	
}

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

/**
 * 手动拷贝某个特定的数据块
 * 
 * hadoop的安装包：有三个数据块：
 * 最后的结果要求：只取出第二个数据块
 */

public class HDFSDemo2 {

	
	/**
	 *  1、使用流的方式上传
	    2、使用流的方式下载
		3、从随机地方开始读，读任意长度
		4、删除HDFS上整个文件夹
		5、删除某个路径下特定类型的文件，比如class类型文件，比如txt类型文件
		
	 */
	
	static FileSystem fs = null;
	static Configuration conf = null;
	
	@Before
	public void init() throws Exception{
		conf = new Configuration();
		
//		conf.set("fs.defaultFS", "hdsf://hadoop02:9000");
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		conf.set("fs.defaultFS", "file:///");
		
		fs = FileSystem.get(conf);
	}
	
	@Test
	public void testConf(){
		
		Iterator<Entry<String, String>> iterator = conf.iterator();
		
		while(iterator.hasNext()){
			Entry<String, String> next = iterator.next();
			System.out.println(next.getKey() + " == " + next.getValue());
		}
	}
	
	@Test
	public void upload() throws Exception{
		
		// 该方法的底层实现就是采用普通的流操作方式
		fs.copyFromLocalFile(new Path("c:/max_min_sum.sh"), new Path("/ghgj"));
//		fs.moveFromLocalFile(new Path("c:/max_min_sum.sh"), new Path("/ghgj"));
//		fs.copyToLocalFile(new Path("c:/max_min_sum.sh"), new Path("/ghgj"));
	}
	
	@Test
	public void uploadFileByStream() throws Exception{
		// path路径：目标文件路径，也就是HDSF上的路径
		String outPath = "/ghgj/maxMinSum.sh";
		String inputPath = "c:/max_min_sum.sh";
		
		// 使用filesystem的api方法  create能够直接打开一个 hdfs上的输出流
		FSDataOutputStream outStream = fs.create(new Path(outPath));
		
		FileInputStream fis = new FileInputStream(new File(inputPath));
		
		IOUtils.copyBytes(fis, outStream, 4096);
	}
	
	@Test
	public void downloadFileByStream() throws Exception{
		
		String inputPath = "/ghgj/maxMinSum.sh";
		String outPath = "c:/max_min_sum11111.sh";
		
		FileOutputStream fos = new FileOutputStream(new File(outPath));
		FSDataInputStream openStream = fs.open(new Path(inputPath));
		
		IOUtils.copyBytes(openStream, fos, 4096);
	}
	
	/**
	 * 该测试方法表示从offset开始读，读入以后的所有内容
	 * @throws Exception
	 */
	@Test
	public void testRandomRead() throws Exception{
		String path = "/ghgj/number.txt";
		String outputPath = "c:/halfNumber111.txt";
		FSDataInputStream open = fs.open(new Path(path));
		
		// 如果这一行代码没有指定，那么就表示整个文件进行下载，如果指定了offset的话，表示从指定的offset进行拷贝
		long offset = 24L;
		open.seek(offset);
		
		FileOutputStream fos = new FileOutputStream(new File(outputPath));
		
		IOUtils.copyBytes(open, fos, 4096);
	}
	
	/**
	 * 从offset开始读，读length长度的内容
	 * @throws Exception
	 */
	@Test
	public void testRandomRead2() throws Exception{
		String path = "/ghgj/number.txt";
		String outputPath = "c:/halfNumber222.txt";
		FSDataInputStream open = fs.open(new Path(path));
		
		// 如果这一行代码没有指定，那么就表示整个文件进行下载，如果指定了offset的话，表示从指定的offset进行拷贝
		// 读入的数据长度是 length
		long offset = 24L;
		long length = 36L;
		open.seek(offset);
		FileOutputStream fos = new FileOutputStream(new File(outputPath));
		
		IOUtils.copyBytes(open, fos, length, true);
	}
	
	@Test
	public void delete() throws Exception{
		
		boolean delete = fs.delete(new Path("/aa/bb/"), true);
		System.out.println(delete);
	}
	
	@Test
	public void deleteSHFile() throws IOException {
		
		String path = "/ghgj/";
		
		// 文件过滤器
		/*PathFilter pathFilter = new PathFilter() {
			@Override
			public boolean accept(Path path) {
				return false;
			}
		};*/
		
		// 以下代码得到的结果是只返回指定路径下的所有文件和文件夹，不包括子文件夹下的内容
//		FileStatus[] listStatus = fs.listStatus(new Path(path), pathFilter);
		FileStatus[] listStatus = fs.listStatus(new Path(path));
		for(FileStatus fileStatus : listStatus){
			System.out.println(fileStatus.getPath());
		}
		
		System.out.println("--------------------------------");
		
		// 显示所有的文件
		RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path(path), true);
		while(listFiles.hasNext()){
			LocatedFileStatus next = listFiles.next();
			
			System.out.println(next.getPath());
		}
		
		System.out.println("--------------------------------");
		
		RemoteIterator<LocatedFileStatus> listLocatedStatus = fs.listLocatedStatus(new Path(path));
		while(listLocatedStatus.hasNext()){
			LocatedFileStatus next = listLocatedStatus.next();
			System.out.println(next.getPath());
		}
	}
	
	/**
	 * 测试删除指定文件夹下（包括所有子文件夹）所有的符合条件的文件
	 * @throws IOException
	 */
	@Test
	public void testDeleteSHFile() throws IOException{
		DeleteSHFile.deleteSHFile(fs, "/ghgj/");
	}
	
	@After
	public void close(){
		try {
			fs.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	@Test
	public void testFileSystemType(){
		
		System.out.println(fs.getClass());
	}
}

import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

/**
 * 用来测试HDFS的功能：上传，下载了，查看，创建文件夹
 * 
 * 
 * FileSystem
 * 
 * Configuration：管理整个项目运行环境的配置信息
 * configuration默认会加载的配置文件有：
 * 		core-default.xml
 * 		hdfs-default.xml
 * 		mapred-default.xml
 * 		yarn-default.xml
 * 
 * Configurationg conf ==  new Configurationg(); 
 * 表示 conf中管理配置信息就是 以上四个默认配置文件当中的信息 
 * 
 * 但是，假如，，你的项目当中加入了
 * 		core-site.xml
 * 		hdfs-site.xml
 * 		mapred-site.xml
 * 		yarn-site.xml
 * 这四个配置文件当中的任何一个，并且防止classpath下，那么
 * Configurationg conf ==  new Configurationg(); 
 * 也会加载这些配置文件当中的配置信息
 * 
 * 如果以上  8 个配置文件都在classpath下， 那就涉及到 配置信息的  优先级：
 * site  >  default
 * 
 * 如果代码当中也进行手动设置配置信息，那么优先级就会变成：
 * 
 * 代码   >  site  >  default
 * 
 * conf对象管理就是项目当中能够加载到的所有配置信息
 * 
 * FileSystem ：  就表示是一个  文件系统的 抽象     一个 FileSystem实例对象，就代表一个 文件系统
 * FileSystem fs = FileSystem.get(uri, conf, "hadoop")
 * 
 * FileSystem的实例过程当中会从 conf 当中去读取   fs.defaultFS的值 ，来对应初始化FS对象
 * 
 * 如果conf中指定了  fs.defaultFS = "hdfs://hadoop02:9000", 那就表示， FileSystem初始化的就是  主节点为 hadoop02 的 HDFS 文件系统对象
 * 如果conf中没有指定 fs.defaultFS ， 那么 FileSystem初始化的就是  本地文件系统（代码在哪里云运行，文件系统就是谁）
 * 
 * 	如果没有指定 fs.defaultFS ， 那么 fs.defaultFS = file:///
 * 
 * FileSystem 是一个 抽象类
 * 
 *  如果没有指定  fs.defaultFS，  那么  FileSystem的类型是   LocalFileSystem
 *  如果指定fs.defaultFS = "hdfs://hadoop02:9000"， 那么FileSystem 的类型就是  DistributedFileSystem
 *  
 *  
 *  HDFS API  :  两个：FileSystem   Configuration
 *  ZooKeeper ： 两个  : ZooKeeper，  Watcher
 *  HBase ： 十四五个 API  
 *  		新 API ，  老 API ：  方法
 *  
 * 所有关于java的API 都很好理解： 都是抽象  
 * 
 * 	HBase ： 表：HTable
 * 			列：HColumn -- Qualify
 * 			列簇：ColumnFamily
 * 
 * 	hbase的表sutent表： HTable table =   new  HTable("student");
 *  table.addColumn(Column c)
 *
 * table.insert()
 * table.delete()
 * table.update()
 * 
 * Admin  admin  = new Admin("hbase://hadoop04:8888");
 * admin.createTable(table)
 * 
 * 函数式编程：
 * sc.textFile("/ghgj/student.txt").flatMap(sr1).map(sr2).reduceByKey(sr33).collect();
 * 
 * 最后学 SPark的时候。算子 总共 有 将近  100个，   常用 20个左右
 *
 *sparkCore ： 任务调度过程，算子
 *
 *Spark SQL
 *spark Streaming
 *
 *Scala就是封装java编程语言
 *
 * Java：  面向对象  --- 一切皆对象  --- 设计模式
 * web : ORM思想
 * 
 * 在使用的过程当中，一定要注意吧  site 配置文件放在 src下，就可以不需要手动加载，但是如果不在src下， 那么就需要手动加载
 * 
 * conf.addResources("config/test.xml")
 */
public class HDFSDemo1704 {

	public static void main(String[] args) throws Exception {

//		testMkdirs();

		put("D:\\1704\\hadoop-day1\\soft\\hadoop-2.6.5-src.tar.gz");
	}

	public static void testMkdirs()  throws Exception {
		// 在hdfs上创建一个文件夹
		Configuration conf = new Configuration();

		// 创建一个文件系统的实例对象，创建的是哪个文件系统的实例对象？
		// fs.defaultFS = hdfs://hadoop02:9000
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop02:9000"), conf, "hadoop");

		boolean mkdirs = fs.mkdirs(new Path("/ghgj/aa/bb/cc/dd"));

		System.out.println(mkdirs ? "创建文件夹成功" : "创建文件夹失败");

		fs.close();
	}

	public static void put(String path) throws IOException {
		
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
		
		conf.set("dfs.replication", "2");
		
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		FileSystem fs = FileSystem.get(conf);
		
		fs.copyFromLocalFile(new Path(path), new Path("/ghgj/hadoop-src.tar.gz"));
		
		fs.close();
	}
	
	static Configuration conf = null;
	static FileSystem fs = null;
	
	// 初始化一个FileSystem实例对象
	@Before
	public void init() throws Exception{
		conf = new Configuration();
		conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		fs = FileSystem.get(conf);
	}
	
	@Test
	public void testConf(){
		
		// 假如配置文件不是  core-site.xml  hdfs-site.xml  yarn-site.xml  mapred-site.xml 命名当中
		// 的任意一个， 那么该资源就需要进行手动加载
		
		/**
		 * 以上四个配置文件的路径必须在src下，才能进行自动加载
		 */
		conf.addResource("myconf/test.xml");
		
		System.out.println(conf.get("system2345"));
		System.out.println(conf.get("fs.defaultFS"));
	}
	
	@Test
	public void get() throws Exception{
		String path = "/ghgj/hadoop-src.tar.gz";
		
//		fs.copyToLocalFile(new Path(path), new Path("c:/myhadoop-src.gz"));
		
		fs.moveToLocalFile(new Path(path), new Path("c:/myhadoop-src.gz"));
	}
	
	@Test
	public void listFiles() throws Exception{
		String path = "/ghgj/";
		RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path(path), true);
		
		while(listFiles.hasNext()){
			LocatedFileStatus next = listFiles.next();
			
			Path path2 = next.getPath();
			long len = next.getLen();
			System.out.println(path2.getName() + " ------- "+ len);
			
			// 此方法返回的是该文件的数据库的信息（数据块有几个副本。副本在哪些地方）
			// 该文件有四个数据块，那么blockLocations.length = 4
			// BlockLocation 对象存储的就是 一个 数据块的信息
			BlockLocation[] blockLocations = next.getBlockLocations();
			
			for(BlockLocation bl : blockLocations){
				
				String[] names = bl.getNames();
				for(String name : names){
					System.out.print(name+"\t");
				}
				
				System.out.print(names[0] + "\t");
				// 数据块每个副本的存放位置：hosts
				String[] hosts = bl.getHosts();
				for(String host : hosts){
					System.out.print(host + "\t");
				}
				System.out.println();
			}
		}
	}
	
	@Test
	public void listStatus() throws Exception{
		String path = "/ghgj/";
		
		FileStatus[] listStatus = fs.listStatus(new Path(path));
		
		for(FileStatus fileStatus : listStatus){
			short replication = fileStatus.getReplication();
			Path path2 = fileStatus.getPath();
//			String name = path2.getName();
			System.out.print(replication +"\t"+ path2+"\t");
			
			boolean directory = fileStatus.isDirectory();
			if(directory){
				System.out.println("文件夹");
			}else{
				System.out.println("文件");
			}
			
		}
	}
	
	@After
	public void close(){
		try {
			fs.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}