Python script: Create one-to-one corresponding field fields in Elasticsearch according to the columns in the PostgreSQL table

When optimizing the program that automatically matches the columns of the PostgreSQL table and creates corresponding column information in Elasticsearch, you can follow the steps below:

  1. Connect to a PostgreSQL database and get metadata information for a table, including column names and data types.
import psycopg2

# 连接到PostgreSQL数据库
conn = psycopg2.connect(
    host="your_pg_host",
    port=your_pg_port,
    database="your_pg_database",
    user="your_pg_username",
    password="your_pg_password"
)

# 获取表的元数据信息
cur = conn.cursor()
cur.execute("SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'your_table'")
columns = cur.fetchall()

# 关闭数据库连接
cur.close()
conn.close()
  1. Create an Elasticsearch index and dynamically generate field mappings based on table column information.
from elasticsearch import Elasticsearch

# 连接到Elasticsearch
es = Elasticsearch(['your_es_host:your_es_port'])

# 创建索引
index_name = 'your_index_name'
index_mapping = {
    
    
    'mappings': {
    
    
        'properties': {
    
    }
    }
}

# 根据PostgreSQL的数据类型获取对应的Elasticsearch数据类型
def get_es_type(data_type):
    if data_type == 'integer':
        return 'integer'
    elif data_type == 'bigint':
        return 'long'
    elif data_type == 'numeric':
        return 'float'
    elif data_type == 'character varying' or data_type == 'text':
        return 'text'
    elif data_type == 'boolean':
        return 'boolean'
    elif data_type == 'timestamp with time zone' or data_type == 'timestamp without time zone':
        return 'date'
    elif data_type == 'bytea':
        return 'binary'
    else:
        return 'keyword'  # 默认使用keyword类型

# 添加字段映射
for column_name, data_type in columns:
    es_type = get_es_type(data_type)
    index_mapping['mappings']['properties'][column_name] = {
    
    'type': es_type}

# 执行索引创建
es.indices.create(index=index_name, body=index_mapping)
  1. Import the data of the PostgreSQL table into the Elasticsearch index.
# 从PostgreSQL中获取数据
conn = psycopg2.connect(
    host="your_pg_host",
    port=your_pg_port,
    database="your_pg_database",
    user="your_pg_username",
    password="your_pg_password"
)
cur = conn.cursor()
cur.execute("SELECT * FROM your_table")
data = cur.fetchall()

# 将数据导入到Elasticsearch
for record in data:
    document = {
    
    }
    for i in range(len(columns)):
        if columns[i][1] == 'bytea':
            # 处理bytea类型字段
            bytea_value = record[i]
            binary_value = bytes(bytea_value)
            document[columns[i][0]] = binary_value
        else:
            document[columns[i][0]] = record[i]
    es.index(index=index_name, body=document)

# 关闭数据库连接
cur.close()
conn.close()

Through the above steps, you can optimize the program to automatically match the columns of the PostgreSQL table, create corresponding field mappings in Elasticsearch, and import the data into the corresponding indexes. Please modify the connection information and other parameters in the code according to the actual situation to suit your environment and needs.

Guess you like

Origin blog.csdn.net/a772304419/article/details/132348705