遍历HDFS目录,并输出文件大小

package com.whh.bigdata.test;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;

import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * Created by whh on 2018/2/7.
 */
public class HdfsFile {
    /**
     * 往local文件里写数据
     */
    public static void appendToFile(String sDestFile, String sContent) {
        // String sContent = "I love Ysm";
        // String sDestFile = "F:/work/logParse/autoCreateHql/myWrite.txt";
        File destFile = new File(sDestFile);
        BufferedWriter out = null;
        if (!destFile.exists()) {
            try {
                destFile.createNewFile();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        try {
            out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(sDestFile, true)));
            out.write(sContent);
            out.newLine();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (out != null) {
                    out.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    public static void main(String[] args) {
        Configuration conf = new Configuration();//定义配置文件
        FileSystem hdfs = null;//文件系统变量
        StringBuffer sb = new StringBuffer();
        Path path=new Path("/");


        //获取某目录下的文件列表+文件大小
        try {
            hdfs = FileSystem.get(new URI("hdfs://BIGDATA-HADOOP-02.whh.net:8022"), conf, "bigdata");
            FileStatus[] listStatus = hdfs.listStatus(path);

            Path[] listPath = FileUtil.stat2Paths(listStatus);  //列出一级目录下的文件列表
            for (Path p : listPath) {//
                FileStatus[] pStatus = hdfs.listStatus(p); //

                Path[] listP = FileUtil.stat2Paths(pStatus);
                for (Path p2 : listP) {
                    String str=p.toString()+"\t"+p2.toString()+"\t"
                            +hdfs.getContentSummary(p2).getLength()+ "\t"
                            +hdfs.getContentSummary(p2).getSpaceConsumed();
                   String strre= str.replace("hdfs://BIGDATA-HADOOP-02.whh.net:8022","");
                         System.out.println(strre);



                //    appendToFile("F:/work/HDFSfilePro.txt",str);
                }
//                String fileNamePath = p.toString();//获得带路径的文件名字符串
//
//                //会根据集群的配置输出,例如我这里输出3G
//                System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getSpaceConsumed());
//                // 显示实际的输出,例如这里显示 1G
//                System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getLength());
            }

        } catch (IOException e) {
            e.printStackTrace();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }

}

以上代码有一个小bug,当执行到没有权限的目录时,for循环会退出,需要try ...catch一下,修改如下:

for (Path p : listPath) {
                FileStatus[] pStatus = hdfs.listStatus(p); 

                Path[] listP = FileUtil.stat2Paths(pStatus);
                String strre="";
                for (Path p2 : listP) {
                try {
                    String str = p.toString() + "\t" + p2.toString() + "\t"
                            + hdfs.getContentSummary(p2).getLength() + "\t"
                            + hdfs.getContentSummary(p2).getSpaceConsumed();
                    strre = str.replace("hdfs://BIGDATA-HADOOP-02.whh.net:8022", "");
                    System.out.println(strre);

                } catch (AccessControlException e) {
                    System.out.println(p.toString() + "\t" + p.toString());
                    } catch (IOException e) {
                e.printStackTrace();
            }


                    appendToFile("F:/work/HDFSfilePro-2018-02-08.txt",strre);
                }
//                String fileNamePath = p.toString();//获得带路径的文件名字符串
//
//                //会根据集群的配置输出,例如我这里输出3G
//                System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getSpaceConsumed());
//                // 显示实际的输出,例如这里显示 1G
//                System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getLength());
            }

猜你喜欢

转载自my.oschina.net/u/3267050/blog/1619793