Lucence+盘古的地址匹配

1   环境配置     添加应用  将pangu.xml与字典文件放在指定位置

2 初始化    

创建盘古分词器

protect Analyer PanGuAnalyer

{

  get { return new PanGuAnalyer(); }

}

PanGu.Segment.Init();

3 创建索引

public void CRindex()
        {
            IndexWriter dddsd = new IndexWriter("E:\\DIZHIindex", PanGuAnalyzer, true);



            IFeatureClass icladd = AEDataHelper.OpenMdbFeatureClass(fileName4, this.comboBox3.SelectedItem.ToString());

            IQueryFilter queryFilter = new QueryFilterClass();
            queryFilter.WhereClause = "";
            IFeatureCursor pCursor = icladd.Update(queryFilter, false);
            IFeature pfea = pCursor.NextFeature();
            while (pfea != null)
            {
                int index = pfea.Fields.FindField("Address");
                string address = pfea.get_Value(index).ToString();
                int index2 = pfea.Fields.FindField("X");
                string xzb = pfea.get_Value(index2).ToString();
                int index3 = pfea.Fields.FindField("Y");
                string yzb = pfea.get_Value(index3).ToString();
                Document doc = new Document();

                doc.Add(new Lucene.Net.Documents.Field("地址", address, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED));//先分词 再索引
                doc.Add(new Lucene.Net.Documents.Field("X坐标", xzb, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED));
                doc.Add(new Lucene.Net.Documents.Field("Y坐标", yzb, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED));
                dddsd.AddDocument(doc);
               

                pfea = pCursor.NextFeature();
           
            }
            dddsd.Optimize();
            dddsd.Close();

            MessageBox.Show("创建完成");   

        }

4 匹配
  IndexSearcher isd = new IndexSearcher("E:\\DIZHIindex");

            QueryParser par = new QueryParser("地址", PanGuAnalyzer);

             IFeatureClass icladd = AEDataHelper.OpenMdbFeatureClass(fileName5, this.comboBox4.SelectedItem.ToString());

            IQueryFilter queryFilter = new QueryFilterClass();
            queryFilter.WhereClause = "";
            IFeatureCursor pCursor = icladd.Update(queryFilter, false);
            IFeature pfea = pCursor.NextFeature();
            while (pfea != null)
            {
                int index = pfea.Fields.FindField("Address");
                string address = pfea.get_Value(index).ToString();
                string fcre = GetKeyWordsSplitBySpace(address);
               
                Query qq = par.Parse(fcre);
                TopDocs hits = isd.Search(qq, 10);
                if (hits.totalHits>0)
                {
                    Document odcdd = isd.Doc(hits.scoreDocs[0].doc);

                int index2 = pfea.Fields.FindField("PPDZ");
                pfea.set_Value(index2, odcdd.Get("地址"));
                int index3 = pfea.Fields.FindField("X");
                pfea.set_Value(index3, odcdd.Get("X坐标"));
                int index4 = pfea.Fields.FindField("Y");
                pfea.set_Value(index4, odcdd.Get("Y坐标"));

                pfea.Store();
          
                }
                
                pfea = pCursor.NextFeature();
            }
             MessageBox.Show("匹配完成");

总结  本文使用lucence+盘古对地名地址进行匹配  匹配成功率为80%   若要源代码    qq1148460370    微信号chicharito2914  请表明添加原因


猜你喜欢

转载自blog.csdn.net/u010723516/article/details/53759612
今日推荐