1. Cree un nuevo proyecto maven y agregue referencias hadoop-common y hadoop-hdfs en pom.xml.
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.0</version>
</dependency>
2. Copie los archivos de configuración de Hadoop core-site.xml, hdfs-site.xml, log4j.properties implementados en centos7 en el directorio de recursos del proyecto.
3. Ejemplo de lectura de archivos en hdfs
package com.che.hdfs;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* hdfs 读文件
*
*/
public class FileRead {
public static void main(String[] args) {
Configuration conf = new Configuration();
try {
// 指定hdfs入口地址 与 core-site.xml中fs.defaultFs值一致
conf.set("fs.defaultFs", "hdfs://172.20.10.4:9000");
FileSystem fs = FileSystem.get(conf);
// 指定读取hdfs文件系统中的文件地址
String filePath = "/user/che/1021001/test.txt";
Path targetFile = new Path(filePath);
if (!fs.exists(targetFile)) {
System.out.println("hdfs文件:" + filePath + "不存在");
}
// 指定输出流
FSDataInputStream in = fs.open(targetFile);
OutputStream out = System.out;
byte buffer[] = new byte[256];
try {
int bytesRead = 0;
while ((bytesRead = in.read(buffer)) > 0) {
out.write(buffer, 0, bytesRead);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
in.close();
out.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
4. Ejemplo de escritura de archivos en hdfs
package com.che.hdfs;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* hdfs 写文件
*/
public class FileWrite {
public static void main(String[] args) {
Configuration conf = new Configuration();
try {
// 指定hdfs入口地址 与 core-site.xml中fs.defaultFs值一致
conf.set("fs.defaultFs", "hdfs://172.20.10.4:9000");
FileSystem fs = FileSystem.get(conf);
// 要读取的文件路径(本地文件)
String inFilePath = "/Users/diguoliang/Desktop/test01.txt";
// 要写到hdfs文件系统中的文件路径
String outFilePath = "/user/che/1021001/test1.txt";
File inFile = new File(inFilePath);
if (!inFile.exists()) {
System.out.println("要读取的文件:" + inFilePath + "不存在。");
}
InputStream in = new BufferedInputStream(new FileInputStream(inFilePath));
// 构造输出流
Path outFile = new Path(outFilePath);
FSDataOutputStream out = fs.create(outFile);
// 开始写入
byte buffer[] = new byte[256];
try {
int bytesRead = 0;
while ((bytesRead = in.read(buffer)) > 0) {
out.write(buffer, 0, bytesRead);
}
} catch (IOException e) {
System.out.println("写文件异常:" + e.getMessage());
} finally {
in.close();
out.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
4.1 El error de permiso del archivo de escritura HDFS es el siguiente:
org.apache.hadoop.security.AccessControlException: org.apache.hadoop.security .AccessControlException: Permiso denegado: user = che, access = WRITE, inode = "hadoop": hadoop: supergroup: rwxr-xr-x
La solución es agregar la siguiente configuración al archivo hdfs-site.xml de hadoop y luego reiniciar hdfs
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>