Flink写出数据到Elasticsearch(带用户名密码)

前言

记录一下Flink带用户名密码写出到Elasticsearch的踩坑过程及解决方案。

版本:Flink1.13,Elasticsearch7.17.4

正文

maven依赖如下:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <artifactId>poas</artifactId>
        <groupId>org.example</groupId>
        <version>1.0-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

    <artifactId>core</artifactId>

    <properties>
        <java.version>1.8</java.version>
        <flink.version>1.13.0</flink.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-jdbc_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-elasticsearch7_2.12</artifactId>
            <version>${flink.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.83</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.24</version>
        </dependency>
        <dependency>
            <groupId>commons-beanutils</groupId>
            <artifactId>commons-beanutils</artifactId>
            <version>1.9.4</version>
        </dependency>

    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>8</source>
                    <target>8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

Flink写出数据到Elasticsearch的代码如下:

package com.poas.task;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.poas.bean.Event;
import com.poas.bean.OriginalEvent;
import com.poas.constants.ESConstant;
import com.poas.utils.FlinkUtil;
import com.poas.utils.KafkaUtil;
import com.poas.utils.NLPUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkBase;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch.util.RetryRejectedExecutionFailureHandler;
import org.apache.flink.streaming.connectors.elasticsearch7.ElasticsearchSink;
import org.apache.flink.streaming.connectors.elasticsearch7.RestClientFactory;
import org.apache.flink.util.Collector;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static com.poas.constants.KafkaConstant.EVENT;
@Slf4j
public class EventFlink {
    
    
    public static void main(String[] args) throws Exception {
    
    
        StreamExecutionEnvironment env = FlinkUtil.getEnvironment();

        DataStreamSource<String> kafkaDS = env.addSource(KafkaUtil.getFlinkKafkaConsumer(EVENT, "default"));

        SingleOutputStreamOperator<OriginalEvent> originalEventStream = kafkaDS.flatMap(new FlatMapFunction<String, OriginalEvent>() {
    
    
            @Override
            public void flatMap(String value, Collector<OriginalEvent> out) throws Exception {
    
    
                try {
    
    
                    JSONObject object = JSON.parseObject(value);
                    out.collect(object.toJavaObject(OriginalEvent.class));
                } catch (Exception e) {
    
    
                    log.info("过滤脏数据");
                }
            }
        });

        SingleOutputStreamOperator<Event> eventStream = originalEventStream.map(new MapFunction<OriginalEvent, Event>() {
    
    
            @Override
            public Event map(OriginalEvent originalEvent) throws Exception {
    
    
                Event event = new Event();
                BeanUtils.copyProperties(event, originalEvent);
                List<String> segmentList = NLPUtil.getEventSegmentList(event.getContent());
                event.setKeywords(segmentList);
                System.out.println(event);
                return event;
            }
        });

        RestClientFactory restClientFactory = ESConstant.restClientFactory;
        ArrayList<HttpHost> httpHosts = new ArrayList<>();
        httpHosts.add(new HttpHost(ESConstant.HOST, 9200));
        ElasticsearchSink.Builder<Event> esBuilder = new ElasticsearchSink.Builder<>(
                httpHosts,
                new ElasticsearchSinkFunction<Event>() {
    
    
                    @Override
                    public void process(Event event, RuntimeContext runtimeContext, RequestIndexer indexer) {
    
    
                        indexer.add(createIndexRequest(event));
                    }

                    public IndexRequest createIndexRequest(Event element) {
    
    
                        Map<String, Object> map = new HashMap<>();
                        map.put("content", element.getContent());
                        map.put("trending", element.getTrending());
                        map.put("url", element.getUrl());
                        map.put("origin", element.getOrigin());
                        map.put("keywords", element.getKeywords().toArray());
                        map.put("crawlTime", element.getCrawlTime());
                        return Requests.indexRequest()
                                .index("poas_event")
                                .source(map);
                    }
                });
        //设置用户名密码
        esBuilder.setRestClientFactory(restClientFactory);
        // 每100条数据执行一次或5秒执行一次
        esBuilder.setBulkFlushMaxActions(100);
        esBuilder.setBulkFlushInterval(5000);
        // 设置失败重试的次数
        esBuilder.setBulkFlushBackoffRetries(3);
        // 设置重试的时间间隔
        esBuilder.setBulkFlushBackoffDelay(2);
        // 设置重试策略
        esBuilder.setBulkFlushBackoffType(ElasticsearchSinkBase.FlushBackoffType.EXPONENTIAL);
        // 设置失败处理
        esBuilder.setFailureHandler(new RetryRejectedExecutionFailureHandler());

        eventStream.addSink(esBuilder.build());

        env.execute();
    }
}

因为我的ES设置了安全认证,所以连接时要带上用户名和密码,这部分内容写在RestClientFactory中:

package com.poas.constants;

import org.apache.flink.streaming.connectors.elasticsearch7.RestClientFactory;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.elasticsearch.client.RestClientBuilder;

public class ESConstant {
    
    
    public static final String HOST = "xxxx";

    public static final String ES_USERNAME = "elastic";

    public static final String ES_PASSWORD = "xxxx";

    public static RestClientFactory restClientFactory = new RestClientFactory() {
    
    
        @Override
        public void configureRestClientBuilder(RestClientBuilder restClientBuilder) {
    
    
            CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            credentialsProvider.setCredentials(AuthScope.ANY,
                    new UsernamePasswordCredentials(ES_USERNAME, ES_PASSWORD));
            restClientBuilder.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
    
    
                @Override
                public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpAsyncClientBuilder) {
    
    
                    httpAsyncClientBuilder.disableAuthCaching();
                    return httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                }
            });
        }
    };
}

然而启动Flink后,报错:NoSuchMethodError,截图如下:
在这里插入图片描述

看报错的意思是缺少了某个方法,第一反应是缺少依赖,于是搜了一下org.apache.commons.codec.binary.Base64相关的依赖:

<dependency>
  <groupId>commons-codec</groupId>
  <artifactId>commons-codec</artifactId>
  <version>1.10</version>
</dependency>

重新启动Flink,依然报错!上网搜了一下说可能是jar包冲突,然而我并不知道是哪个包冲突了

经过了各种尝试之后,我又细看了一下报错信息,推断问题是出在安全认证的时候,于是百度了一下org.apache.commons.codec.binary.Base64 的具体功能,发现它是用来加密字符串的。后来又在某篇博客中得到启发,决定自己实现用户名和密码的加密,修改RestClientFactory中的代码如下:

package com.poas.constants;

import org.apache.flink.calcite.shaded.org.apache.commons.codec.binary.Base64;
import org.apache.flink.streaming.connectors.elasticsearch7.RestClientFactory;
import org.apache.http.message.BasicHeader;
import org.elasticsearch.client.RestClientBuilder;

public class ESConstant {
    
    
    public static final String HOST = "xxxx";

    public static final String ES_USERNAME = "elastic";

    public static final String ES_PASSWORD = "xxxx";

    public static RestClientFactory restClientFactory = new RestClientFactory() {
    
    
        @Override
        public void configureRestClientBuilder(RestClientBuilder restClientBuilder) {
    
    
            /*CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            credentialsProvider.setCredentials(AuthScope.ANY,
                    new UsernamePasswordCredentials(ES_USERNAME, ES_PASSWORD));
            restClientBuilder.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
                @Override
                public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpAsyncClientBuilder) {
                    httpAsyncClientBuilder.disableAuthCaching();
                    return httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                }
            });*/
            String user = ES_USERNAME;
            String pwd = ES_PASSWORD;
            String auth = Base64.encodeBase64String((user+":"+pwd).getBytes());
            restClientBuilder.setDefaultHeaders(new BasicHeader[]{
    
    new BasicHeader("Authorization", "Basic " + auth)});
        }
    };
}

再次启动Flink程序,终于成功!!

猜你喜欢

转载自blog.csdn.net/wzc3614/article/details/128766534