Lucene学习笔记(一)

Lucene学习笔记(一)

建立索引:

<!--<br><br>Code highlighting produced by Actipro CodeHighlighter (freeware)<br>http://www.CodeHighlighter.com/<br><br>-->importjava.io.File;
importjava.io.FileReader;
importjava.io.IOException;
importjava.util.Date;

importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.IndexWriter;

publicclassIndexer
{
//建立索引
/**
*@paramargs
*/
publicstaticvoidmain(String[]args)throwsException
{
//TODOAuto-generatedmethodstub
if(args.length!=2)
{
thrownewException("Usage:java"+Indexer.class.getName()+"<indexdir><datadir>");
}
FileindexDir
=newFile(args[0]);//存放索引的路径
FiledataDir=newFile(args[1]);//要建立索引的目录路径
longstart=newDate().getTime();
intnumIndexed=index(indexDir,dataDir);//建立索引
longend=newDate().getTime();
System.
out.println("Indexing"+numIndexed+"filestook"+(end-start)+"milliseconds");
}

privatestaticintindex(FileindexDir,FiledataDir)throwsIOException
{
if(!dataDir.exists()||!dataDir.isDirectory())
{
thrownewIOException(dataDir+"doesnotexistorisnotadirectory");
}
IndexWriterwriter
=newIndexWriter(indexDir,newStandardAnalyzer(),true);
writer.setUseCompoundFile(
false);
indexDirectory(writer,dataDir);
intnumIndexed=writer.docCount();
writer.optimize();
writer.close();
returnnumIndexed;
}

privatestaticvoidindexDirectory(IndexWriterwriter,FiledataDir)throwsIOException
{
//TODOAuto-generatedmethodstub
File[]files=dataDir.listFiles();
for(inti=0;i<files.length;i++)
{
Filef
=files[i];
if(f.isDirectory())
{
indexDirectory(writer,f);
}
elseif(f.getName().endsWith(".txt"))
{
indexFile(writer,f);
}
}
}

privatestaticvoidindexFile(IndexWriterwriter,Filef)throwsIOException
{
if(f.isHidden()||!f.exists()||!f.canRead())
{
return;
}
System.
out.println("Indexing"+f.getCanonicalPath());
Documentdoc
=newDocument();
doc.add(
newField("contents",newFileReader(f)));
doc.add(
newField("filename",f.getCanonicalPath(),Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);

}

}

搜索:
<!--<br><br>Code highlighting produced by Actipro CodeHighlighter (freeware)<br>http://www.CodeHighlighter.com/<br><br>-->importjava.io.File;
importjava.util.Date;

importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.queryParser.QueryParser;
importorg.apache.lucene.search.Hits;
importorg.apache.lucene.search.IndexSearcher;
importorg.apache.lucene.search.Query;
importorg.apache.lucene.store.Directory;
importorg.apache.lucene.store.FSDirectory;


publicclassSearcher
{

/**
*@paramargs
*/
publicstaticvoidmain(String[]args)throwsException
{
if(args.length!=2)
{
thrownewException("Usage:java"+Searcher.class.getName()+"<indexdir><auery>");
}
FileindexDir
=newFile(args[0]);//要搜索的索引所在目录
Stringq=args[1];//搜索关键字
if(!indexDir.exists()||!indexDir.isDirectory())
{
thrownewException(indexDir+"doesnotexistorisnotadirectory.");
}
search(indexDir,q);
}

privatestaticvoidsearch(FileindexDir,Stringq)throwsException
{
DirectoryfsDir
=FSDirectory.getDirectory(indexDir,false);
IndexSearcher
is=newIndexSearcher(fsDir);//打开索引
QueryParserparser=newQueryParser("contents",newStandardAnalyzer());

Queryquery
=parser.parse(q);//对文本内容进行分析查询
longstart=newDate().getTime();
Hitshits
=is.search(query);//搜索索引
longend=newDate().getTime();
System.err.println(
"Found"+hits.length()+"document(s)(in"+(end-start)+"milliseconds)thatmatchedquery'"+q+"’:");
for(inti=0;i<hits.length();i++)
{
Documentdoc
=hits.doc(i);//得到匹配的文档
System.out.println(doc.get("filename"));
}
}

}

重点学习的几个方面:

<!--[if !supportLists]-->1, <!--[endif]-->Lucene的索引结构,

<!--[if !supportLists]-->2, <!--[endif]-->各种异构数据源的解析器(Parser

<!--[if !supportLists]-->3, <!--[endif]-->分词器(Analyzer)提取索引项(尤其是如何进行中文分词)

注:这里使用的是lucene2.2.0