package com.eric.lucene; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; public class ScoreSortTest { public static void main(String[] args) throws Exception { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); doc1.add(new Field("bookname","thinking in java", Field.Store.YES, Field.Index.ANALYZED)); doc2.add(new Field("bookname","thinking in java java java", Field.Store.YES, Field.Index.ANALYZED)); doc3.add(new Field("bookname","thinking in c++", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc1); writer.addDocument(doc2); writer.addDocument(doc3); writer.optimize(); writer.close(); IndexSearcher searcher = new IndexSearcher(dir); Query query = new TermQuery(new Term("bookname","java")); // query.setBoost(2); TopScoreDocCollector collector = TopScoreDocCollector.create(100, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for(int i=0; i<hits.length;i++){ Document doc = searcher.doc(hits[i].doc); System.out.println(doc.getBoost()); System.out.print(doc.get("bookname") + "\t\t"); System.out.println(hits[i].score); System.out.println(searcher.explain(query, hits[i].doc)); } } }
在没有query.setBoost(2);的情况下,结果如下:
引用
1.0
thinking in java java java 0.8660254
0.8660254 = (MATCH) fieldWeight(bookname:java in 1), product of:
1.7320508 = tf(termFreq(bookname:java)=3)
1.0 = idf(docFreq=2, maxDocs=3)
0.5 = fieldNorm(field=bookname, doc=1)
1.0
thinking in java 0.625
0.625 = (MATCH) fieldWeight(bookname:java in 0), product of:
1.0 = tf(termFreq(bookname:java)=1)
1.0 = idf(docFreq=2, maxDocs=3)
0.625 = fieldNorm(field=bookname, doc=0)
在有query.setBoost(2);的情况下,结果如下:
引用
1.0
thinking in java java java 0.8660254
0.8660254 = (MATCH) fieldWeight(bookname:java in 1), product of:
1.7320508 = tf(termFreq(bookname:java)=3)
1.0 = idf(docFreq=2, maxDocs=3)
0.5 = fieldNorm(field=bookname, doc=1)
1.0
thinking in java 0.625
0.625 = (MATCH) fieldWeight(bookname:java in 0), product of:
1.0 = tf(termFreq(bookname:java)=1)
1.0 = idf(docFreq=2, maxDocs=3)
0.625 = fieldNorm(field=bookname, doc=0)
在Field和Document中setBoost的值,在搜索结果中是有变化的。(因为Field都相同,没有进行尝试,但和Document一样,都是将boost值设置到了索引中)。代码如下:
package com.eric.lucene; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; public class ScoreSortTest { public static void main(String[] args) throws Exception { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); doc1.add(new Field("bookname","thinking in java", Field.Store.YES, Field.Index.ANALYZED)); doc1.setBoost(4); doc2.add(new Field("bookname","thinking in java java java", Field.Store.YES, Field.Index.ANALYZED)); doc3.add(new Field("bookname","thinking in c++", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc1); writer.addDocument(doc2); writer.addDocument(doc3); writer.optimize(); writer.close(); IndexSearcher searcher = new IndexSearcher(dir); Query query = new TermQuery(new Term("bookname","java")); TopScoreDocCollector collector = TopScoreDocCollector.create(100, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for(int i=0; i<hits.length;i++){ Document doc = searcher.doc(hits[i].doc); System.out.println(doc.getBoost()); System.out.print(doc.get("bookname") + "\t\t"); System.out.println(hits[i].score); System.out.println(searcher.explain(query, hits[i].doc)); } } }
在没有doc1.setBoost(4);这一行的时候,结果如下:
引用
1.0
thinking in java java java 0.8660254
0.8660254 = (MATCH) fieldWeight(bookname:java in 1), product of:
1.7320508 = tf(termFreq(bookname:java)=3)
1.0 = idf(docFreq=2, maxDocs=3)
0.5 = fieldNorm(field=bookname, doc=1)
1.0
thinking in java 0.625
0.625 = (MATCH) fieldWeight(bookname:java in 0), product of:
1.0 = tf(termFreq(bookname:java)=1)
1.0 = idf(docFreq=2, maxDocs=3)
0.625 = fieldNorm(field=bookname, doc=0)
在有doc1.setBoost(4);这一行的时候,结果如下:
引用
1.0
thinking in java 2.5
2.5 = (MATCH) fieldWeight(bookname:java in 0), product of:
1.0 = tf(termFreq(bookname:java)=1)
1.0 = idf(docFreq=2, maxDocs=3)
2.5 = fieldNorm(field=bookname, doc=0)
1.0
thinking in java java java 0.8660254
0.8660254 = (MATCH) fieldWeight(bookname:java in 1), product of:
1.7320508 = tf(termFreq(bookname:java)=3)
1.0 = idf(docFreq=2, maxDocs=3)
0.5 = fieldNorm(field=bookname, doc=1)