package com.whh.bigdata.test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
/**
* Created by whh on 2018/2/7.
*/
public class HdfsFile {
/**
* 往local文件里写数据
*/
public static void appendToFile(String sDestFile, String sContent) {
// String sContent = "I love Ysm";
// String sDestFile = "F:/work/logParse/autoCreateHql/myWrite.txt";
File destFile = new File(sDestFile);
BufferedWriter out = null;
if (!destFile.exists()) {
try {
destFile.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
}
try {
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(sDestFile, true)));
out.write(sContent);
out.newLine();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (out != null) {
out.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) {
Configuration conf = new Configuration();//定义配置文件
FileSystem hdfs = null;//文件系统变量
StringBuffer sb = new StringBuffer();
Path path=new Path("/");
//获取某目录下的文件列表+文件大小
try {
hdfs = FileSystem.get(new URI("hdfs://BIGDATA-HADOOP-02.whh.net:8022"), conf, "bigdata");
FileStatus[] listStatus = hdfs.listStatus(path);
Path[] listPath = FileUtil.stat2Paths(listStatus); //列出一级目录下的文件列表
for (Path p : listPath) {//
FileStatus[] pStatus = hdfs.listStatus(p); //
Path[] listP = FileUtil.stat2Paths(pStatus);
for (Path p2 : listP) {
String str=p.toString()+"\t"+p2.toString()+"\t"
+hdfs.getContentSummary(p2).getLength()+ "\t"
+hdfs.getContentSummary(p2).getSpaceConsumed();
String strre= str.replace("hdfs://BIGDATA-HADOOP-02.whh.net:8022","");
System.out.println(strre);
// appendToFile("F:/work/HDFSfilePro.txt",str);
}
// String fileNamePath = p.toString();//获得带路径的文件名字符串
//
// //会根据集群的配置输出,例如我这里输出3G
// System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getSpaceConsumed());
// // 显示实际的输出,例如这里显示 1G
// System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getLength());
}
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
}
}
}
以上代码有一个小bug,当执行到没有权限的目录时,for循环会退出,需要try ...catch一下,修改如下:
for (Path p : listPath) {
FileStatus[] pStatus = hdfs.listStatus(p);
Path[] listP = FileUtil.stat2Paths(pStatus);
String strre="";
for (Path p2 : listP) {
try {
String str = p.toString() + "\t" + p2.toString() + "\t"
+ hdfs.getContentSummary(p2).getLength() + "\t"
+ hdfs.getContentSummary(p2).getSpaceConsumed();
strre = str.replace("hdfs://BIGDATA-HADOOP-02.whh.net:8022", "");
System.out.println(strre);
} catch (AccessControlException e) {
System.out.println(p.toString() + "\t" + p.toString());
} catch (IOException e) {
e.printStackTrace();
}
appendToFile("F:/work/HDFSfilePro-2018-02-08.txt",strre);
}
// String fileNamePath = p.toString();//获得带路径的文件名字符串
//
// //会根据集群的配置输出,例如我这里输出3G
// System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getSpaceConsumed());
// // 显示实际的输出,例如这里显示 1G
// System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getLength());
}