一、跨度域查询
基类是:SpanQuery
子类:SpantermQuery; //测试用的这个子类当然还有其他
二、测试
public class SpanTest { Directory dir; IndexWriter writer; IndexReader reader; IndexSearcher search; String[] ceshi; //初始化把索引存在内存中做测试 public void init() throws IOException{ dir=new RAMDirectory(); writer=writer(dir); ceshi=new String[]{"i like you","are you ok"}; } public IndexWriter writer(Directory dir) throws IOException{ Analyzer analyzer=new WhitespaceAnalyzer(Version.LUCENE_42); IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_42,analyzer); return new IndexWriter(dir, config); } public void createWrite() throws IOException{ for(int i=0;i<ceshi.length;i++){ Document doc=new Document(); doc.add(new TextField("contents",ceshi[i],Store.YES)); writer.addDocument(doc); } writer.close(); } public void test() throws IOException{ reader=DirectoryReader.open(dir); search=new IndexSearcher(reader); SpanTermQuery query=new SpanTermQuery(new Term("contents","you")); Map<Term,TermContext>m=new HashMap<Term,TermContext>(); TermContext termContext=TermContext.build(search.getTopReaderContext(),query.getTerm(),false); m.put(query.getTerm(), termContext); Bits bits = new Bits.MatchAllBits(0); Spans spans=query.getSpans(reader.getContext().leaves().get(0),bits, m); int num=0; System.out.println(query.getTerm()); while(spans.next()){ num++; int id=spans.doc(); Document d=reader.document(id); Analyzer analyzer=new WhitespaceAnalyzer(Version.LUCENE_42); TokenStream ts= analyzer.tokenStream("contents", new StringReader(d.get("contents"))); CharTermAttribute termAttribute = ts.addAttribute(CharTermAttribute.class); ts.reset(); //此行,不能少,不然会报 java.lang.ArrayIndexOutOfBoundsException StringBuffer buffer=new StringBuffer(""); buffer.append(""); int k=0; while(ts.incrementToken()){ if(k==spans.start()){ buffer.append("<"); } buffer.append(termAttribute.toString()); if(k+1==spans.end()){ buffer.append(">"); } buffer.append(" "); k++; } System.out.println(spans.start()+" "+spans.end()); System.out.println(buffer); //if(num==3)break; //break; } if(num==0){ System.out.println("no spans"); } } public static void main(String[] args) throws IOException { SpanTest s=new SpanTest(); s.init(); s.createWrite(); s.test(); } }
运行结果:
最后:
int k=0; while(ts.incrementToken()){ if(k==spans.start()){ buffer.append("<"); } buffer.append(termAttribute.toString()); if(k+1==spans.end()){ buffer.append(">"); } buffer.append(" "); k++; } 但是当用其他的分词器里面的匹配写法就出错了,因为其他分词器可能了一些会分词的时候会根据 需求去掉一些词,例如StopWords或者其他的一些词,而那个匹配是根据从0开始的,所以当用 其他的分词器这个地方不是一个真正的单词位置。