import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queries.mlt.MoreLikeThis; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import dev.lzq.search.lucene4x.commons.Manager; public class TestMoreLikeThisQuery { public static void main(String[] args) throws IOException { IndexReader reader = Manager.getIndexReader(); IndexSearcher searcher = Manager.getIndexSearcher(); int refDocId = 1; Document refDoc = searcher.doc(refDocId); System.out.println("关联文档: 【" + refDoc.get("Title") + "】" + refDoc.get("Introduction")); MoreLikeThis mlt = new MoreLikeThis(Manager.getIndexReader()); mlt.setFieldNames(new String[]{"Search_Field_Title", "Introduction"}); // 默认值是2,建议自己做限制,否则可能查不出结果 mlt.setMinTermFreq(1); // 默认值是5,建议自己做限制,否则可能查不出结果 mlt.setMinDocFreq(1); mlt.setAnalyzer(Manager.getAnalyzer()); // query实际质上是BooleanQuery。 // MoreLikeThis的原理是根据你提供的参考Document 和 Fields, 利用Analyzer给该Document的Fileds做分词, 然后拼接BooleanQuery做Occur.SHOULD搜索 Query query = mlt.like(refDocId); TopDocs tds = searcher.search(query, 10); ScoreDoc[] sds = tds.scoreDocs; for(ScoreDoc sd : sds) { Document doc = searcher.doc(sd.doc); System.out.println("相似文档: 【" + doc.get("Title") + "】 " + doc.get("Introduction")); } } }
其中,Manager类的代码我没贴出来, IndexReader, IndexSearcher, Analyzer其实就是普通的新建对象,没什么封装的东西。