java实现把mysql中数据导入elasticsearch



import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.elasticsearch.action.bulk.BackoffPolicy;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.springframework.beans.factory.annotation.Autowired;


public class BulkProcessor2EsDemo {

    public final static int BULK_COUNT = 20000;


    private static TransportClient client=ESConfig.getClient();
//etl551_0705
    public static String table = "world";//表名作为index
    public static long importData(String sql) throws InterruptedException {
        String url = "jdbc:mysql://127.0.0.1:3306/" + table + "?user=root&password=1234&autoReconnect=true&useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=false";
        try {
            Class.forName("com.mysql.jdbc.Driver");
        } catch (ClassNotFoundException e1) {
            e1.printStackTrace();
        }
        boolean isLastEmpty = true;
        long count = 0;
        int blukNum = 0;
        List<String> columnName = Arrays.asList("ID", "Name", "CountryCode", "District", "Population");
        Connection con = null;
        PreparedStatement ps = null;
        Statement st = null;
        ResultSet rs = null;
        try {
            con = DriverManager.getConnection(url);
            ps = (PreparedStatement) con.prepareStatement(sql,
                    ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
            ps.setFetchSize(Integer.MIN_VALUE);
            ps.setFetchDirection(ResultSet.FETCH_REVERSE);
            rs = ps.executeQuery();
            ResultSetMetaData rsmd = rs.getMetaData();
            int colCount = rsmd.getColumnCount();
            String index = table;
            String type = "historyLog";
            long allStart = System.currentTimeMillis();//记录批量执行的开始时间
            BulkProcessor bulkProcessor = createBulkProcessor();
            while (rs.next()) {// while控制行数
                blukNum++;
                Map<String, String> map = new LinkedHashMap<>();
                for (int i = 1; i <= colCount; i++) {
                    String name = rsmd.getColumnName(i);
                    if (columnName.contains(name)) {
                        String value = rs.getString(i);
                        if (value != null && !"".equals(value.trim())
                                && value.trim().length() > 0) {
                            map.put(name, value);
                        }
                    }
                }
                bulkProcessor.add(new IndexRequest(index, type, blukNum + "").source(map));
            }//end while
// 关闭
            bulkProcessor.awaitClose(10, TimeUnit.MINUTES);
            System.out.println("取回数据量为  " + blukNum + " 行!");
            long end = System.currentTimeMillis();//记录批量入库结束的开始时间
            System.out.println("共耗时  " + (end - allStart) / 1000 + " s!");
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            try {
                if (rs != null) {
                    rs.close();
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }
            try {
                if (ps != null) {
                    ps.close();
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }
            try {
                if (con != null) {
                    con.close();
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        if(client != null){
            client.close();
        }
        return count;
    }
    public static BulkProcessor createBulkProcessor() {
// 初始化Bulk处理器
        BulkProcessor bulkProcessor = BulkProcessor.builder(
                client, new BulkProcessor.Listener() {
                    long begin = 0;
                    long cost;
                    int count = 0;
                    @Override
                    public void afterBulk(long executionId, BulkRequest bulkRequest, BulkResponse bulkResponse) {
                        cost = (System.currentTimeMillis() - begin) / 1000;
                        count += bulkRequest.numberOfActions();
                        System.out.println("bulk success. size:[{" + count + "}] cost:[{" + cost + "}s]");
                    }
                    @Override
                    public void afterBulk(long executionId, BulkRequest bulkRequest, Throwable throwable) {
                        System.out.println("bulk update has failures, will retry:" + throwable);
                    }
                    @Override
                    public void beforeBulk(long executionId, BulkRequest bulkRequest) {
                        begin = System.currentTimeMillis();
                    }
                })
                .setBulkActions(20000)// 批量导入个数
                .setBulkSize(new ByteSizeValue(200, ByteSizeUnit.MB))// 满xMB进行导入
                .setConcurrentRequests(10)// 并发数
                .setFlushInterval(TimeValue.timeValueSeconds(50))// 冲刷间隔
                .setBackoffPolicy(BackoffPolicy.constantBackoff(TimeValue.timeValueSeconds(1), 3)) // 重试3次,间隔1s
                .build();
        return bulkProcessor;
    }
    public static void main(String[] args) throws InterruptedException {
        String sql = "select * from city ";
        importData(sql);
    }
}




import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import java.net.InetAddress;
import java.net.UnknownHostException;

@Configuration
public class ESConfig {
    private static String clusterName="my-application";
    private static String host="****";
    private static Integer port=9300;


    /** 构建Settings 对象 */
    private static Settings settings = Settings.builder().put("cluster.name", clusterName).build();
    /** TransportClient 对象, 用于连接ES集群 */
    private static volatile TransportClient client;

    /**
     * 同步synchronized(*.class)代码块的作用和synchronized static方法作用一样,
     * 对当前对应的*.class 进行持锁, static方法和.class 一样都是锁的该类本身,同一个监听器
     * @return
     */
    @Bean
    public static TransportClient getClient(){
        if(client == null){
            synchronized (TransportClient.class){
                client = new PreBuiltTransportClient(settings);
                try {
                    String[] allHost = host.split(",");
                    for (String str:allHost) {
                        client.addTransportAddresses(new TransportAddress(InetAddress.getByName(str), port));
                    }
                } catch (UnknownHostException e) {
                    e.printStackTrace();
                }
            }
        }
        return client;
    }

}

猜你喜欢

转载自blog.csdn.net/sinat_36748650/article/details/89383515
今日推荐