package com.java1234.lucene;import java.io.File;import java.io.FileReader;import java.nio.file.Paths;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;public class Indexer { private IndexWriter writer; //写索引实例 /** * 索引的构造方法 * @param indexDir * @throws Exception */ public Indexer(String indexDir)throws Exception{ Directory dir=FSDirectory.open(Paths.get(indexDir)); //找到索引存放的路径 Analyzer analyzer=new StandardAnalyzer(); //标准分词器(只对英文管用) IndexWriterConfig conf=new IndexWriterConfig(analyzer); //索引的配置(需要传入一个分词器) writer=new IndexWriter(dir, conf); } /** * 关闭写索引 * @throws Exception */ public void close()throws Exception{ writer.close(); } /** * 对指定目录的所有文件进行写索引 * @param dataDir * @throws Exception */ public int index(String dataDir)throws Exception{ File []files=new File(dataDir).listFiles(); for(File f:files){ indexFile(f); } return writer.numDocs(); //把索引文件的数量返回 } /** * 索引指定的文件 * @param f * @throws Exception */ private void indexFile(File f)throws Exception { System.out.println("索引文件:"+f.getCanonicalPath()); Document doc=getDocument(f); //读取目标索引文件 writer.addDocument(doc); //把读取后生成后的文档放到,写索引的工具里 } /** * 获取文档,文档里再设置每个字段(相当于数据库里的一条记录,每个字段有它自己的值(值是从目标索引文件里取)) * @param f */ private Document getDocument(File f) throws Exception{ Document doc=new Document(); doc.add(new TextField("contents", new FileReader(f))); //把文档(目标索引文件)的内容读取放进doc里(用键值对的形式) doc.add(new TextField("fileName", f.getName(),Field.Store.YES)); //把文档(目标索引文件)的名字读取到doc里 doc.add(new TextField("fullPath", f.getCanonicalPath(),Field.Store.YES)); //把文档(目标索引文件)的全路径读取到doc里 return doc; } public static void main(String[] args) { String indexDir="E:\\lucene"; //索引存放的目录 String dataDir="E:\\lucene\\data"; // 目标索引文件的目录 Indexer indexer=null; int numIndexed=0; long start=System.currentTimeMillis(); try { indexer=new Indexer(indexDir); //实例化索引 numIndexed=indexer.index(dataDir); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); }finally{ try { indexer.close(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } long end=System.currentTimeMillis(); System.out.println("索引:"+numIndexed+"个文件 花费了"+(end-start)+"毫秒"); } }
package com.java1234.lucene;import java.nio.file.Paths;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;public class Search { public static void search(String indexDir,String q)throws Exception{ Directory dir=FSDirectory.open(Paths.get(indexDir)); //获取索引路径 IndexReader reader=DirectoryReader.open(dir); //索引读取器 IndexSearcher searcher=new IndexSearcher(reader); //索引查询器 Analyzer analyzer=new StandardAnalyzer(); //选定分析的方法 QueryParser parser=new QueryParser("contents", analyzer); //分析器实例化 Query query=parser.parse(q); //把指定查询的内容放入分析器中 long start=System.currentTimeMillis(); TopDocs docs=searcher.search(query,10); //用索引查询器进行查询 long end=System.currentTimeMillis(); System.out.println("匹配 "+q+" ,总共花费"+(end-start)+"毫秒"+"查询到"+docs.totalHits+"个记录"); for(ScoreDoc scoreDoc:docs.scoreDocs){ Document document=searcher.doc(scoreDoc.doc); System.out.println(document.get("fullPath")); } reader.close(); } public static void main(String[] args) { String indexDir="D:\\lucene"; String q="Zygmunt Saloni"; try { search(indexDir,q); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }}