python elasticsearch environment to build

windows linux environment to build

windows download ZIP
Linux Download tar
Download: https://www.elastic.co/downloads/elasticsearch

After extracting run: bin / elasticsearch (or bin \ elasticsearch.bat on Windows)
checks for success: Visit http: // localhost: 9200

linux can not run as the root user,
ordinary users to run error:
java.nio.file.AccessDeniedException

The reason: The current user does not have permission
Solution: chown linux user name elasticsearch installation directory -R
example: chown -R ealsticsearch /data/wwwroot/elasticsearch-6.2.4
PS: other Java software reported .AccessDeniedException error can be solved in the same way, to perform the appropriate directory permissions to users

Code examples

The following code to achieve similar chain home network cell search function.
Read the cell and address information from documents written es, and then matched to the corresponding cell through the city district where the code and search keywords.
Code contains three main parts:
1. Create an index
2. bulk to bulk data storage es
3. Data Search
Note:
es version of the code version 2.xx low pay, high versions differ in the type of index data created

#coding:utf8
from __future__ import unicode_literals
import os
import time
import config
from datetime import datetime
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk

class ElasticSearch():
    def __init__(self, index_name,index_type,ip ="127.0.0.1"):
        '''
        :param index_name: 索引名称
        :param index_type: 索引类型
        '''
        self.index_name =index_name
        self.index_type = index_type
        # 无用户名密码状态
        #self.es = Elasticsearch([ip])
        #用户名密码状态
        self.es = Elasticsearch([ip],http_auth=('elastic', 'password'),port=9200)
    def create_index(self,index_name="ftech360",index_type="community"):
        '''
        创建索引,创建索引名称为ott,类型为ott_type的索引
        :param ex: Elasticsearch对象
        :return:
        '''
        #创建映射
        _index_mappings = {
            "mappings": {
                self.index_type: {
                    "properties": {
                        "city_code": {
                            "type": "string",
                            # "index": "not_analyzed"
                        },
                        "name": {
                            "type": "string",
                            # "index": "not_analyzed"
                        },
                        "address": {
                            "type": "string",
                            # "index": "not_analyzed"
                        }
                    }
                }

            }
        }
        if self.es.indices.exists(index=self.index_name) is True:
            self.es.indices.delete(index=self.index_name)
        res = self.es.indices.create(index=self.index_name, body=_index_mappings)
        print res

    def build_data_dict(self):
        name_dict = {}
        with open(os.path.join(config.datamining_dir,'data_output','house_community.dat')) as f:
            for line in f:
                line_list = line.decode('utf-8').split('\t')
                community_code = line_list[6]
                name = line_list[7]
                city_code = line_list[0]
                name_dict[community_code] = (name,city_code)

        address_dict = {}
        with open(os.path.join(config.datamining_dir,'data_output','house_community_detail.dat')) as f:
            for line in f:
                line_list = line.decode('utf-8').split('\t')
                community_code = line_list[6]
                address = line_list[10]
                address_dict[community_code] = address

        return name_dict,address_dict

    def bulk_index_data(self,name_dict,address_dict):
        '''
        用bulk将批量数据存储到es
        :return:
        '''
        list_data = []
        for community_code, data in name_dict.items():
            tmp = {}
            tmp['code'] = community_code
            tmp['name'] = data[0]
            tmp['city_code'] = data[1]
            
            if community_code in address_dict:
                tmp['address'] = address_dict[community_code]
            else:
                tmp['address'] = ''

            list_data.append(tmp)
        ACTIONS = []
        for line in list_data:
            action = {
                "_index": self.index_name,
                "_type": self.index_type,
                "_id": line['code'], #_id 小区code
                "_source": {
                    "city_code": line['city_code'],
                    "name": line['name'],
                    "address": line['address']
                    }
            }
            ACTIONS.append(action)
            # 批量处理
        success, _ = bulk(self.es, ACTIONS, index=self.index_name, raise_on_error=True)
        #单条写入 单条写入速度很慢
        #self.es.index(index=self.index_name,doc_type="doc_type_test",body = action)

        print('Performed %d actions' % success)

    def delete_index_data(self,id):
        '''
        删除索引中的一条
        :param id:
        :return:
        '''
        res = self.es.delete(index=self.index_name, doc_type=self.index_type, id=id)
        print res

    def get_data_id(self,id):
        res = self.es.get(index=self.index_name, doc_type=self.index_type,id=id)
        # # 输出查询到的结果
        print res['_source']['city_code'], res['_id'],  res['_source']['name'], res['_source']['address']

    def get_data_by_body(self, name, city_code):
        # doc = {'query': {'match_all': {}}}
        doc = {
            "query": {
                "bool":{
                    "filter":{
                        "term":{
                        "city_code": city_code
                        }
                    },
                    "must":{
                        "multi_match": {
                            "query": name,
                            "type":"phrase_prefix",
                            "fields": ['name^3', 'address'],
                            "slop":1,
                            
                            }

                    }
                }
            }
        }
        _searched = self.es.search(index=self.index_name, doc_type=self.index_type, body=doc)
        data = _searched['hits']['hits']
        return data
         

if __name__=='__main__':
    #数据插入es
    obj = ElasticSearch("ftech360","community")
    obj.create_index()
    name_dict, address_dict = obj.build_data_dict()
    obj.bulk_index_data(name_dict,address_dict)

    #从es读取数据
    obj2 = ElasticSearch("ftech360","community")
    obj2.get_data_by_body(u'保利','510100')

Guess you like

Origin www.cnblogs.com/i-love-python/p/11443978.html