hadoop hdfs JAVA API 基本文件操作

       本文主要介绍了 HADOOP HDFS JAVA API 对HDFS中的文件操作,如:创建文件,将文件上传到HDFS,从HDFS下载文件到本地,HDFS文件删除等

       话不多说,上代码:

    

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

/**
 * HADOOP HDFS 工具类
 * 
 * @author 
 * @date 2013-4-13
 */
public class HdsfFileUtil {

	/**
	 * 删除HDFS文件
	 * 
	 * @param path
	 *            删除文件路径
	 * @throws IOException
	 *             IO异常
	 */
	public static void deleteHdfsFile(String path) throws IOException {

		// 加载配置文件
		Configuration conf = new Configuration();
		conf.addResource("conf/core-site.xml");

		// 获取要删除的文件
		Path delefPath = new Path(path);
		FileSystem hdfs = delefPath.getFileSystem(conf);
		boolean isDeleted = false;

		// 检查文件是否存在,若存在,递归删除
		if (hdfs.exists(delefPath)) {
			isDeleted = hdfs.delete(delefPath, true);
			// 递归删除
		} else {
			isDeleted = false;
			System.out.println("文件不存在:删除失败");
		}
		System.out.println("Delete?" + isDeleted);
	}

	/**
	 * 将本地文件上传至HDFS
	 * 
	 * @param local
	 *            本地路径
	 * @param hdfs
	 *            hdfs路径
	 * @throws IOException
	 *             IO异常
	 */
	public static void uploadToHdfs(String local, String hdfs)
			throws IOException {

		// 加载配置文件
		Configuration config = new Configuration();
		config.addResource("conf/core-site.xml");

		// 获取HDFS文件系统
		FileSystem fs = FileSystem.get(URI.create(hdfs), config);

		// 读取本地文件
		FileInputStream fis = new FileInputStream(new File(local));
		OutputStream os = fs.create(new Path(hdfs));
		// 复制
		IOUtils.copyBytes(fis, os, 4096, true);

		os.close();
		fis.close();

		System.out.println("拷贝完成...");
	}

	/**
	 * 读取HDFS文件
	 * 
	 * @param fileName
	 *            源文件路径
	 * @param dest
	 *            写入文件路径
	 * @throws IOException
	 */
	public static void readFromHdfs(String fileName, String dest)
			throws IOException {
		// 加载配置文件
		Configuration conf = new Configuration();
		conf.addResource("conf/core-site.xml");

		// 获取HDFS文件系统
		FileSystem fs = FileSystem.get(URI.create(fileName), conf);

		// 打开文件流
		FSDataInputStream hdfsInStream = fs.open(new Path(fileName));

		// 写入本地文件系统
		OutputStream out = new FileOutputStream(dest);

		byte[] ioBuffer = new byte[1024];

		// 按行读取
		int readLen = hdfsInStream.read(ioBuffer);

		while (-1 != readLen) {

			out.write(ioBuffer, 0, readLen);
			System.out.println(new String(ioBuffer));
			readLen = hdfsInStream.read(ioBuffer);

		}

		out.close();

		hdfsInStream.close();

		fs.close();
	}

	/**
	 * 列出HDFS目录
	 * 
	 * @param path
	 *            路径
	 * @throws IOException
	 */
	public static void getDirectoryFromHdfs(String path) throws IOException {

		// 加载配置文件
		Configuration conf = new Configuration();
		conf.addResource("conf/core-site.xml");

		// 获取HDFS文件系统
		FileSystem fs = FileSystem.get(URI.create(path), conf);

		// 获取指定路径下的文件
		FileStatus fileList[] = fs.listStatus(new Path(path));

		int size = fileList.length;

		// 循环输出文件
		for (int i = 0; i < size; i++) {

			System.out.println("name:" + fileList[i].getPath().getName()
					+ "\t\tsize:" + fileList[i].getLen());

		}

		fs.close();

	}

}

 

猜你喜欢

转载自suxain.iteye.com/blog/1846898