客户端通过API操作HDFS

一、前期准备

  • jar包准备
    解压hadoop的压缩包,进入share文件夹,将其中的jar包放入一个文件夹中,在eclipse中导入。
    2.配置环境变量
    配置HADOOP_HOME环境变量
    在这里插入图片描述在这里插入图片描述

二、API操作HDFS

操作HDFS步骤主要有三步
1.获取文件系统
2.对文件进行操作
3.关闭资源

1.文件上传

public static void main(String[] args) throws Exception {
		// 1 获取文件系统
		Configuration configuration = new Configuration();

		// 配置在集群上运行
		configuration.set("fs.defaultFS", "hdfs://hadoop102:8020");
		//FileSystem fileSystem = FileSystem.get(configuration);
		
		// 直接配置访问集群的路径和访问集群的用户名称
		FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration, "atguigu");
		
		// 2 把本地文件上传到文件系统中
		fileSystem.copyFromLocalFile(new Path("f:/xiyou1.txt"), new Path("/user/atguigu/xiyou1.txt"));
		
		// 3 关闭资源
		fileSystem.close();
		System.out.println("over");
	}

2.文件下载

	public void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException{
		Configuration configuration = new Configuration();
		//1.获取文件系统
		FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		
		System.out.println(fileSystem.toString());
		//2.执行下载文件操作
		fileSystem.copyToLocalFile(false,new Path("/user/atguigu/xiyou.txt"),new Path("F:/xiyou.txt"),true );
		//3.关闭资源
		fileSystem.close();
	}

注意:在第二步执行下载文件操作的时候,使用fs.copyToLocalFile(new Path("/."), new Path("e:/.**"));报空指针异常,可能是因为windows下系统环境变量引起。此时建议使用带四个参数的copyToLocalFile。

3.创建目录

public void mkdirAtHDFS() throws IOException, InterruptedException, URISyntaxException{
		//1.获取文件系统
		Configuration configuration = new Configuration();
		
		FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
		
		//2.执行创建文件夹操作
		
		fileSystem.mkdirs(new Path("/user/atguigu/tiantang"));
		
		fileSystem.mkdirs(new Path("/user/atguigu/sunhouzi/houzaizi"));
		//3.关闭资源
		fileSystem.close();
	}

4.删除文件夹

public void deleteAtHDFS() throws IOException, InterruptedException, URISyntaxException{
		//1.获取文件系统
		//1.获取文件系统
		Configuration configuration = new Configuration();
				
		FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
		//2.执行删除操作
		fileSystem.delete(new Path("/user/atguigu/xiyou2.txt"),true);
		//3.关闭资源
		fileSystem.close();
	}
	

5.更改文件名称

public void renameAtHDFS() throws IOException, InterruptedException, URISyntaxException{ 
		//1.获取文件系统
		Configuration configuration = new Configuration();
		
		FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
		//2.执行更改文件名称
		fileSystem.rename(new Path("/user/atguigu/xiyou.txt"),new Path("/user/atguigu/honglou.txt"));
		//3.关闭资源
		fileSystem.close();
	}
	

6.查看文件详情

public void readFileAtHDFS() throws IOException, InterruptedException, URISyntaxException{
				//1.获取文件系统
				Configuration configuration = new Configuration();
				
				FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
				//2.执行查看文件详情操作
				RemoteIterator<LocatedFileStatus> listFiles = fileSystem.listFiles(new Path("/user/atguigu"), true);
				
				while(listFiles.hasNext()){
					LocatedFileStatus next = listFiles.next();
					//文件名称
					System.out.println(next.getPath().getName());
					//打印文件塊大小
					System.out.println(next.getBlockSize());
					//文件大小
					System.out.println(next.getLen());
					//文件权限
					System.out.println(next.getPermission());
					
					
					//文件块的具体信息
					BlockLocation[] blockLocations = next.getBlockLocations();
					
					for(BlockLocation bl:blockLocations){
						System.out.println(bl.getOffset());
						
						String[] hosts = bl.getHosts();
						
						for(String host:hosts) {
							
							System.out.println(host);
						}
						
						
					}
					System.out.println("-------------------");
				}
				//3.关闭资源
				fileSystem.close();
	}
	

7.查看文件夹

public void readFolderAtHDFS() throws IOException, InterruptedException, URISyntaxException {
			//1.获取文件系统
			
			Configuration configuration = new Configuration();
					
			FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
			
			//2.文件夹查看
			FileStatus[] listStatus = fileSystem.listStatus(new Path("/user/atguigu/"));
			//判断是文件还是文件夹
			for(FileStatus status : listStatus) {
				if(status.isFile()) {
					System.out.println("f--"+status.getPath().getName());
				} else {
					System.out.println("d--"+status.getPath().getName());
				}
			}
			
			//3.关闭资源
			fileSystem.close();
		}

三、IO流操作HDFS

1.文件上传

public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException {
		//1.获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		
		//2.获取输出流
		FSDataOutputStream fos = fileSystem.create(new Path("/user/atguigu/output/dongsi.txt"));
		
		
		//3.获取输入流
		FileInputStream fileInputStream = new FileInputStream(new File("f:/dongsi.txt"));
		
		try {
			//4.流对接
			IOUtils.copyBytes(fileInputStream, fos, configuration);
		} finally {
			// TODO: handle finally clause
			//5,关闭资源
			IOUtils.closeStream(fos);
			IOUtils.closeStream(fileInputStream);
		}
		
	}

2.文件下载

public void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException {
		//1.获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
		
		//2获取输入流
		FSDataInputStream open = fileSystem.open(new Path("/user/atguigu/bajie.txt"));
		//3创建输出流
		FileOutputStream fileOutputStream = new FileOutputStream(new File("F:/bajie.txt"));
		//4流对接
		try {
			IOUtils.copyBytes(open, fileOutputStream, configuration);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally{
			//5关闭资源
			IOUtils.closeStream(fileOutputStream);
			IOUtils.closeStream(open);
			
		}
		
		
	}

3.定位文件读取

1.下载大文件第一块数据

//下载大文件第一块数据
	
	@Test
	public void getFileFromHDFSSeek1() throws IOException, InterruptedException, URISyntaxException{
		//1.获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		
		//2.获取输入流
		FSDataInputStream fis = fileSystem.open(new Path("/user/atguigu/input/hadoop-2.7.2.tar.gz"));
		//3.创建输出流
		FileOutputStream fos = new FileOutputStream(new File("F:/hadoop-2.7.2.tar.gz.part1"));
		
		//4.流对接(指向第一块数据首地址)
		byte[] buf = new byte[1024];
		
		for (int i = 0; i<1024*128;i++) {
			fis.read(buf);
			fos.write(buf);
		}
		
		try {
			IOUtils.closeStream(fis);
			IOUtils.closeStream(fos);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	

2.下载大文件第二块

//下载第二块
	@Test
	public void getFileFromHDFSSeek2() throws IOException, InterruptedException, URISyntaxException {
		//1.获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
				
		//2.获取输入流
		FSDataInputStream fis = fileSystem.open(new Path("/user/atguigu/input/hadoop-2.7.2.tar.gz"));
		//3.创建输出流
		FileOutputStream fos = new FileOutputStream(new File("F:/hadoop-2.7.2.tar.gz.part2"));
				
		//4.流对接(指向第二块数据的首地址)
		//定位到128M
		fis.seek(1024*1024*128);
		
		try {
			IOUtils.copyBytes(fis, fos, configuration);
		}catch(Exception e){
			
		}finally {
			IOUtils.closeStream(fis);
			IOUtils.closeStream(fos);
		}
	
	}

3.合并文件
在Windows命令行窗口执行

type hadoop-2.7.2.tar.gz.part2 >> hadoop-2.7.2.tar.gz.part1

猜你喜欢

转载自blog.csdn.net/chen7588693/article/details/84849762
今日推荐