Hadoop基础-通过IO流操作HDFS

　　　　　　　　　　　　　　　　　　Hadoop基础-通过IO流操作HDFS

　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　作者：尹正杰

一.上传文件

 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E7%94%9F%E6%80%81%E5%9C%88/
 4 EMAIL:[email protected]
 5 */
 6 package hdfs.yinzhengjie.org.cn;
 7 
 8 import org.apache.hadoop.conf.Configuration;
 9 import org.apache.hadoop.fs.*;
10 import org.apache.hadoop.io.IOUtils;
11 
12 import java.io.File;
13 import java.io.FileInputStream;
14 import java.net.URI;
15 
16 public class HdfsClient {
17 
18     public static void main(String[] args) throws Exception {
19         putFileToHDFS();
20     }
21 
22     public static void putFileToHDFS() throws Exception{
23         //配合Hadoop的环境变量，如果没有配置可能会抛异常：“ERROR util.Shell: Failed to locate the winutils binary in the hadoop binary path”，还有一件事就是你的HADOOP_HOME的bin目录下必须得有winutils.exe
24          System.setProperty("hadoop.home.dir", "D:\\yinzhengjie\\softwares\\hadoop-2.7.3");
25          //创建配置信息对象
26         Configuration conf = new Configuration();
27         //获取文件系统，需要传入hdfs的链接地址，conf对象，以及操作的用户名
28        FileSystem fs = FileSystem.get(new URI("hdfs://node105.yinzhengjie.org.cn:8020"),conf,"hdfs");
29         //创建输入流
30         FileInputStream inStream = new FileInputStream(new File("D:\\yinzhengjie\\data\\yinzhengjie.txt"));
31         //获取输出路径
32         String putFileName = "hdfs://node105.yinzhengjie.org.cn:8020/user/yinzhengjie/2018-11-04.txt";
33         Path writePath = new Path(putFileName);
34         //创建输出流
35         FSDataOutputStream outStream = fs.create(writePath);
36         //流对接
37         try{
38             IOUtils.copyBytes(inStream, outStream, 4096, false);
39         }catch(Exception e){
40             e.printStackTrace();
41         }finally{
42             //关闭流，释放资源
43             IOUtils.closeStream(inStream);
44             IOUtils.closeStream(outStream);
45         }
46     }
47 }

二.下载文件

 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E7%94%9F%E6%80%81%E5%9C%88/
 4 EMAIL:[email protected]
 5 */
 6 package hdfs.yinzhengjie.org.cn;
 7 
 8 import org.apache.hadoop.conf.Configuration;
 9 import org.apache.hadoop.fs.FSDataInputStream;
10 import org.apache.hadoop.fs.FileSystem;
11 import org.apache.hadoop.fs.Path;
12 import org.apache.hadoop.io.IOUtils;
13 
14 import java.io.File;
15 import java.io.FileOutputStream;
16 import java.net.URI;
17 
18 public class HdfsClient {
19 
20     public static void main(String[] args) throws Exception {
21         getFileToHDFS();
22     }
23 
24     public static void getFileToHDFS() throws Exception{
25         //配合Hadoop的环境变量，如果没有配置可能会抛异常：“ERROR util.Shell: Failed to locate the winutils binary in the hadoop binary path”，还有一件事就是你的HADOOP_HOME的bin目录下必须得有winutils.exe
26          System.setProperty("hadoop.home.dir", "D:\\yinzhengjie\\softwares\\hadoop-2.7.3");
27          //创建配置信息对象
28         Configuration conf = new Configuration();
29         //获取文件系统，需要传入hdfs的链接地址，conf对象，以及操作的用户名
30        FileSystem fs = FileSystem.get(new URI("hdfs://node105.yinzhengjie.org.cn:8020"),conf,"hdfs");
31         //获取读取文件路径
32         String filename = "hdfs://node105.yinzhengjie.org.cn:8020/user/yinzhengjie/2018-11-04.txt";
33         //创建读取path
34         Path readPath = new Path(filename);
35         //创建建输入流
36         FSDataInputStream inStream = fs.open(readPath);
37         //创建输出流,指定本地路径
38         FileOutputStream fos = new FileOutputStream(new File("D:\\yinzhengjie\\data\\output.txt"));
39 
40         try{
41             //流对接输出到控制台
42 //            IOUtils.copyBytes(inStream, System.out, 4096, false);
43             //流对考输出到本地磁盘
44             IOUtils.copyBytes(inStream,fos, conf);
45 
46         }catch(Exception e){
47             e.printStackTrace();
48         }finally{
49             //释放资源
50             IOUtils.closeStream(inStream);
51             IOUtils.closeStream(fos);
52             fos.close();
53         }
54     }
55 }

三.定位读取文件

 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E7%94%9F%E6%80%81%E5%9C%88/
 4 EMAIL:[email protected]
 5 */
 6 package hdfs.yinzhengjie.org.cn;
 7 
 8 import org.apache.hadoop.conf.Configuration;
 9 import org.apache.hadoop.fs.FSDataInputStream;
10 import org.apache.hadoop.fs.FileSystem;
11 import org.apache.hadoop.fs.Path;
12 import org.apache.hadoop.io.IOUtils;
13 
14 import java.io.FileOutputStream;
15 import java.net.URI;
16 
17 public class HdfsClient {
18 
19     public static void main(String[] args) throws Exception {
20         readFileSeek1();
21         readFileSeek2();
22     }
23 
24     //读取第一个块大小，128M
25     public static void readFileSeek1() throws Exception{
26         //配合Hadoop的环境变量，如果没有配置可能会抛异常：“ERROR util.Shell: Failed to locate the winutils binary in the hadoop binary path”，还有一件事就是你的HADOOP_HOME的bin目录下必须得有winutils.exe
27          System.setProperty("hadoop.home.dir", "D:\\yinzhengjie\\softwares\\hadoop-2.7.3");
28          //创建配置信息对象
29         Configuration conf = new Configuration();
30         //获取文件系统，需要传入hdfs的链接地址，conf对象，以及操作的用户名
31        FileSystem fs = FileSystem.get(new URI("hdfs://node105.yinzhengjie.org.cn:8020"),conf,"hdfs");
32         //获取输入流路径
33         Path path = new Path("hdfs://node105.yinzhengjie.org.cn:8020//yinzhengjie/cloudera-manager.tar.gz");
34         //打开输入流
35         FSDataInputStream fis = fs.open(path);
36         //创建输出流
37         FileOutputStream fos = new FileOutputStream("D:\\yinzhengjie\\data\\cloudera-manager-1.tar.gz");
38         //定义缓冲区大小是1024
39         byte[] buf = new byte[1024];
40         //读取一个128M的文件
41         for (int i = 0; i < 128 * 1024; i++) {
42             //将数据从输入流读出然后在写入输出流。
43             fis.read(buf);
44             fos.write(buf);
45         }
46         //关闭流
47         IOUtils.closeStream(fis);
48         IOUtils.closeStream(fos);
49     }
50 
51     //将128M后续的大小都读取出来
52     public static void readFileSeek2() throws Exception{
53         //配合Hadoop的环境变量，如果没有配置可能会抛异常：“ERROR util.Shell: Failed to locate the winutils binary in the hadoop binary path”，还有一件事就是你的HADOOP_HOME的bin目录下必须得有winutils.exe
54         System.setProperty("hadoop.home.dir", "D:\\yinzhengjie\\softwares\\hadoop-2.7.3");
55         //创建配置信息对象
56         Configuration conf = new Configuration();
57         //获取文件系统，需要传入hdfs的链接地址，conf对象，以及操作的用户名
58         FileSystem fs = FileSystem.get(new URI("hdfs://node105.yinzhengjie.org.cn:8020"),conf,"hdfs");
59         //获取输入流路径
60         Path path = new Path("hdfs://node105.yinzhengjie.org.cn:8020//yinzhengjie/cloudera-manager.tar.gz");
61         //打开输入流
62         FSDataInputStream fis = fs.open(path);
63         //创建输出流
64         FileOutputStream fos = new FileOutputStream("D:\\yinzhengjie\\data\\cloudera-manager-2.tar.gz");
65         //定位偏移量（第二块的首位,1024*1024就是1M，也就是说他的起始位置是从128M开始的！）
66         fis.seek(1024 * 1024 * 128);
67         //流对接
68         IOUtils.copyBytes(fis, fos, 1024);
69         //关闭资源
70         IOUtils.closeStream(fis);
71         IOUtils.closeStream(fos);
72     }
73 }
74 
75 
76 /**
77  *    将数据写入到本地后，可以使用cmd窗口进入到“D:\yinzhengjie\data”目录中，并执行：D:\yinzhengjie\data>type cloudera-manager-2.tar.gz >> cloudera-manager-1.tar.gz
78  * 之后你可以拿到完整的数据，也可以解压该文件的详细信息。
79  */

Hadoop基础-通过IO流操作HDFS

猜你喜欢