Flink escribe datos en Elasticsearch (con nombre de usuario y contraseña)

prefacio

Registre el proceso paso a paso y la solución de escritura de Flink en Elasticsearch con nombre de usuario y contraseña.

Versión: Flink1.13, Elasticsearch7.17.4

texto

Las dependencias de Maven son las siguientes:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <artifactId>poas</artifactId>
        <groupId>org.example</groupId>
        <version>1.0-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

    <artifactId>core</artifactId>

    <properties>
        <java.version>1.8</java.version>
        <flink.version>1.13.0</flink.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-jdbc_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-elasticsearch7_2.12</artifactId>
            <version>${flink.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.83</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.24</version>
        </dependency>
        <dependency>
            <groupId>commons-beanutils</groupId>
            <artifactId>commons-beanutils</artifactId>
            <version>1.9.4</version>
        </dependency>

    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>8</source>
                    <target>8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

El código para que Flink escriba datos en Elasticsearch es el siguiente:

package com.poas.task;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.poas.bean.Event;
import com.poas.bean.OriginalEvent;
import com.poas.constants.ESConstant;
import com.poas.utils.FlinkUtil;
import com.poas.utils.KafkaUtil;
import com.poas.utils.NLPUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkBase;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch.util.RetryRejectedExecutionFailureHandler;
import org.apache.flink.streaming.connectors.elasticsearch7.ElasticsearchSink;
import org.apache.flink.streaming.connectors.elasticsearch7.RestClientFactory;
import org.apache.flink.util.Collector;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static com.poas.constants.KafkaConstant.EVENT;
@Slf4j
public class EventFlink {
    
    
    public static void main(String[] args) throws Exception {
    
    
        StreamExecutionEnvironment env = FlinkUtil.getEnvironment();

        DataStreamSource<String> kafkaDS = env.addSource(KafkaUtil.getFlinkKafkaConsumer(EVENT, "default"));

        SingleOutputStreamOperator<OriginalEvent> originalEventStream = kafkaDS.flatMap(new FlatMapFunction<String, OriginalEvent>() {
    
    
            @Override
            public void flatMap(String value, Collector<OriginalEvent> out) throws Exception {
    
    
                try {
    
    
                    JSONObject object = JSON.parseObject(value);
                    out.collect(object.toJavaObject(OriginalEvent.class));
                } catch (Exception e) {
    
    
                    log.info("过滤脏数据");
                }
            }
        });

        SingleOutputStreamOperator<Event> eventStream = originalEventStream.map(new MapFunction<OriginalEvent, Event>() {
    
    
            @Override
            public Event map(OriginalEvent originalEvent) throws Exception {
    
    
                Event event = new Event();
                BeanUtils.copyProperties(event, originalEvent);
                List<String> segmentList = NLPUtil.getEventSegmentList(event.getContent());
                event.setKeywords(segmentList);
                System.out.println(event);
                return event;
            }
        });

        RestClientFactory restClientFactory = ESConstant.restClientFactory;
        ArrayList<HttpHost> httpHosts = new ArrayList<>();
        httpHosts.add(new HttpHost(ESConstant.HOST, 9200));
        ElasticsearchSink.Builder<Event> esBuilder = new ElasticsearchSink.Builder<>(
                httpHosts,
                new ElasticsearchSinkFunction<Event>() {
    
    
                    @Override
                    public void process(Event event, RuntimeContext runtimeContext, RequestIndexer indexer) {
    
    
                        indexer.add(createIndexRequest(event));
                    }

                    public IndexRequest createIndexRequest(Event element) {
    
    
                        Map<String, Object> map = new HashMap<>();
                        map.put("content", element.getContent());
                        map.put("trending", element.getTrending());
                        map.put("url", element.getUrl());
                        map.put("origin", element.getOrigin());
                        map.put("keywords", element.getKeywords().toArray());
                        map.put("crawlTime", element.getCrawlTime());
                        return Requests.indexRequest()
                                .index("poas_event")
                                .source(map);
                    }
                });
        //设置用户名密码
        esBuilder.setRestClientFactory(restClientFactory);
        // 每100条数据执行一次或5秒执行一次
        esBuilder.setBulkFlushMaxActions(100);
        esBuilder.setBulkFlushInterval(5000);
        // 设置失败重试的次数
        esBuilder.setBulkFlushBackoffRetries(3);
        // 设置重试的时间间隔
        esBuilder.setBulkFlushBackoffDelay(2);
        // 设置重试策略
        esBuilder.setBulkFlushBackoffType(ElasticsearchSinkBase.FlushBackoffType.EXPONENTIAL);
        // 设置失败处理
        esBuilder.setFailureHandler(new RetryRejectedExecutionFailureHandler());

        eventStream.addSink(esBuilder.build());

        env.execute();
    }
}

Debido a que mi ES ha configurado la autenticación de seguridad, necesito traer el nombre de usuario y la contraseña al conectarme. Esta parte está escrita en RestClientFactory:

package com.poas.constants;

import org.apache.flink.streaming.connectors.elasticsearch7.RestClientFactory;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.elasticsearch.client.RestClientBuilder;

public class ESConstant {
    
    
    public static final String HOST = "xxxx";

    public static final String ES_USERNAME = "elastic";

    public static final String ES_PASSWORD = "xxxx";

    public static RestClientFactory restClientFactory = new RestClientFactory() {
    
    
        @Override
        public void configureRestClientBuilder(RestClientBuilder restClientBuilder) {
    
    
            CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            credentialsProvider.setCredentials(AuthScope.ANY,
                    new UsernamePasswordCredentials(ES_USERNAME, ES_PASSWORD));
            restClientBuilder.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
    
    
                @Override
                public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpAsyncClientBuilder) {
    
    
                    httpAsyncClientBuilder.disableAuthCaching();
                    return httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                }
            });
        }
    };
}

Sin embargo, después de iniciar Flink, se informa un error: NoSuchMethodError, la captura de pantalla es la siguiente:
inserte la descripción de la imagen aquí

Al ver que el informe de error significa que falta un determinado método, la primera reacción es la falta de dependencias, así que busquéorg.apache.commons.codec.binary.Base64Dependencias relacionadas:

<dependency>
  <groupId>commons-codec</groupId>
  <artifactId>commons-codec</artifactId>
  <version>1.10</version>
</dependency>

¡Reinicie Flink y todavía informa un error! Busqué en Internet y dije que puede ser un conflicto de paquete jar, pero no sé qué paquete entra en conflicto.

Después de varios intentos, miré más de cerca el mensaje de error y deduje que el problema estaba en la autenticación de seguridad, así que busqué en Baidu.org.apache.commons.codec.binary.Base64La función específica y descubrió que se utiliza para cifrar cadenas. Más tarde, me inspiré en cierto blog y decidí implementar yo mismo el cifrado del nombre de usuario y la contraseña y modificar el código en RestClientFactory de la siguiente manera:

package com.poas.constants;

import org.apache.flink.calcite.shaded.org.apache.commons.codec.binary.Base64;
import org.apache.flink.streaming.connectors.elasticsearch7.RestClientFactory;
import org.apache.http.message.BasicHeader;
import org.elasticsearch.client.RestClientBuilder;

public class ESConstant {
    
    
    public static final String HOST = "xxxx";

    public static final String ES_USERNAME = "elastic";

    public static final String ES_PASSWORD = "xxxx";

    public static RestClientFactory restClientFactory = new RestClientFactory() {
    
    
        @Override
        public void configureRestClientBuilder(RestClientBuilder restClientBuilder) {
    
    
            /*CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            credentialsProvider.setCredentials(AuthScope.ANY,
                    new UsernamePasswordCredentials(ES_USERNAME, ES_PASSWORD));
            restClientBuilder.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
                @Override
                public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpAsyncClientBuilder) {
                    httpAsyncClientBuilder.disableAuthCaching();
                    return httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                }
            });*/
            String user = ES_USERNAME;
            String pwd = ES_PASSWORD;
            String auth = Base64.encodeBase64String((user+":"+pwd).getBytes());
            restClientBuilder.setDefaultHeaders(new BasicHeader[]{
    
    new BasicHeader("Authorization", "Basic " + auth)});
        }
    };
}

Inicie el programa Flink nuevamente y finalmente tenga éxito. !

Supongo que te gusta

Origin blog.csdn.net/wzc3614/article/details/128766534
Recomendado
Clasificación