ビッグデータの学習(2) - HDFSにアップロードローカル重複除外

HDFSへの大量使用のRedisへのローカルアップロードファイル

導入:
MD5暗号チェックサムHDFSメモリブロックデータも記憶され、データの整合性かどうかを決定するために使用される
以下のコードは、小さなコンテンツアップロード機能の重複排除の模倣特性に言及する
技術選択

  1. springboot
  2. Redisの
  3. Hadoopの
  4. JSP
  5. Mavenの

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>1.5.7.RELEASE</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.msk</groupId>
    <artifactId>springboot-hadoop</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>springboot-hadoop</name>
    <description>Demo project for Spring Boot</description>

    <properties>
        <java.version>1.8</java.version>
    </properties>

    <dependencies>
        <!-- Springboot的web支持 -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <!-- Springboot的测试支持 -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <!-- 只在test测试里面运行 -->
            <scope>test</scope>
        </dependency>
        <!--  springboot于jsp整合 -->
        <dependency>
            <groupId>org.apache.tomcat.embed</groupId>
            <artifactId>tomcat-embed-jasper</artifactId>
        </dependency>
        <!--  对Redis的依赖支持 -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-redis</artifactId>
            <version>1.5.8.RELEASE</version>
        </dependency>
        <dependency>
            <groupId>commons-codec</groupId>
            <artifactId>commons-codec</artifactId>
            <version>1.10</version>
        </dependency>


        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.5.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.5.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.5.2</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter-data-redis -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-redis</artifactId>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>

</project>

application.yml

server:
  port: 8888
  context-path: /hadoop
spring:
  http:
    multipart:
      max-file-size: 10MB
      max-request-size: 100MB
  redis:
    host: 192.168.227.100
    port: 7000

JSPページ

<%@ page contentType="text/html;charset=UTF-8" language="java" isELIgnored="false" %>
<html>
<body>
    <form action="${pageContext.request.contextPath}/upload/upload.do" method="post" enctype="multipart/form-data">
        <input type="file" name="file" value="点击上传">
        <input type="submit" value="提交">
    </form>
</body>
</html>

Javaコード

package com.msk.controller;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.SetOperations;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

@RestController
@RequestMapping("/upload")
public class Upload {
    @Autowired
    StringRedisTemplate redisTemplate;
    @RequestMapping(value = "/upload", produces = "application/json; charset=utf-8")
    public String upload(MultipartFile file) throws Exception {
        String filename = file.getOriginalFilename();
        // 创建流
        File f = new File("E:/" + filename);

        // 加密去重
        String md5Hex = DigestUtils.md5Hex(file.getBytes());
        if ("".equals(filename)) {
            return "空的!";
        }

        Boolean b = isBoolean(md5Hex);
        if (b) {
            setRedis(md5Hex, filename);
            return "已上传相同内容文件!!!";
        } else {

            // 存入本地
            file.transferTo(f);
            FileInputStream inputStream = new FileInputStream(f);

            // 存入HDFS
            FileSystem filesystem = getFilesystem();
            FSDataOutputStream fsDataOutputStream = filesystem.create(new Path("/data/"+filename));
            IOUtils.copyBytes(inputStream, fsDataOutputStream, 1024, true);
            setRedis(md5Hex, filename);

            return "存好了,下一位";
        }
    }

    // 获取FileSystem对象
    private  FileSystem getFilesystem() throws IOException {
        Configuration configuration = new Configuration();
        configuration.set("fs.defaultFS", "hdfs://hadoop1.msk.com:8020");
        return FileSystem.get(configuration);
    }

    // 将加密数据存入redis
    private void setRedis(String key, String vlue) {
        SetOperations<String, String> set = redisTemplate.opsForSet();
        set.add(key, vlue);
    }

    // 去重判断
    private Boolean isBoolean(String md5Hex) {

        return redisTemplate.hasKey(md5Hex);
    }
}
公開された19元の記事 ウォンの賞賛8 ビュー4553

おすすめ

転載: blog.csdn.net/M283592338/article/details/90941943