Hadoop 通过 Maven 用 Java API 对HDFS的读取写入示例

1. 读写源码

注意加注释的地方!!!

/**
 * App.java
 */
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URI;

/**
 * Created by Administrator on 2018/5/23.
 */
public class App {
    public static void main( String[] args )
    {
        try {
            Configuration conf = new Configuration();

            // 不设置该代码会出现错误:java.io.IOException: No FileSystem for scheme: hdfs
            conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");

            String filePath = "hdfs://192.168.47.140:8020/song/hello.txt";
            Path path = new Path(filePath);

            // 这里需要设置URI,否则出现错误:java.lang.IllegalArgumentException: Wrong FS: hdfs://127.0.0.1:9000/test/test.txt, expected: file:///
            FileSystem fs = FileSystem.get(new URI(filePath), conf,"hadoop3");

            System.out.println( "READING ============================" );
            FSDataInputStream is = fs.open(path);
            BufferedReader br = new BufferedReader(new InputStreamReader(is));
            // 示例仅读取一行
            String content = br.readLine();
            System.out.println(content);
            br.close();

            System.out.println("WRITING ============================");
            byte[] buff = "this is helloworld from java api!\n".getBytes();
            FSDataOutputStream os = fs.create(path);
            os.write(buff, 0, buff.length);
            os.close();
            fs.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

运行结果:

468490-a697b98027c7859c.png
image

2. pom.xml文件配置

注意:
    1\. 使用shade方式防止打包失败
    2\. 这里的dependency可以只引用 `hadoop-client`,或者同时引用`hadoop-common`和`hadoop-hdfs`

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>com.jiecxy</groupId>
  <artifactId>HDFSTest</artifactId>
  <version>1</version>
  <packaging>jar</packaging>

  <name>HDFSTest</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>

  <dependencies>
      <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-client</artifactId>
          <version>2.8.1</version>
      </dependency>
  </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <executions>
                    <!-- Run shade goal on package phase -->
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <filters>
                                <filter>
                                    <!-- Do not copy the signatures in the META-INF folder.
                                    Otherwise, this might cause SecurityExceptions when using the JAR. -->
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>

                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>com.jiecxy.App</mainClass>
                                </transformer>
                            </transformers>

                            <createDependencyReducedPom>false</createDependencyReducedPom>
                        </configuration>
                    </execution>
                </executions>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>

        </plugins>
    </build>
</project>

3.可能出现的问题

3.1 java.lang.IllegalArgumentException: Wrong FS

java.lang.IllegalArgumentException: Wrong FS: hdfs://127.0.0.1:9000/test/test.txt, expected: file:///
    at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:666)
    at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:86)
    at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:630)
    at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:861)
    at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:625)
    at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:435)
    at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:146)
    at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:347)
    at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:786)
    at com.jiecxy.App.main(App.java:25)

解决方法:

FileSystem fs = FileSystem.get(conf);

改为:

FileSystem fs = FileSystem.get(new URI(filePath), conf);

3.2 java.io.IOException: No FileSystem for scheme: hdfs

java.io.IOException: No FileSystem for scheme: hdfs
    at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2798)
    at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2809)
    at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:100)
    at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2848)
    at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2830)
    at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:389)
    at com.jiecxy.App.main(App.java:24)

解决方法:
指定hdfs(若只因用了hadoop-common,则需要再加入依赖 hadoop-hdfs,否则找不到该类):

conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");

猜你喜欢

转载自blog.csdn.net/weixin_34342578/article/details/90779710