Lucene的CURD使用
说明:由于lucene将我们的文档document分词成为二进制的,无法阅读,所以我们这里用Lucene的官方程序jar包来查看分词后的内容
打开run.bat其实就是一句话,类似于linux系统启动redis类似的命令
java -jar lukeall-4.10.0.jar
公共部分:导入jar包
<dependencies> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>4.10.2</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queries</artifactId> <version>4.10.2</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-test-framework</artifactId> <version>4.10.2</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>4.10.2</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>4.10.2</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>4.10.2</version> </dependency> <dependency> <groupId>com.janeluo</groupId> <artifactId>ikanalyzer</artifactId> <version>2012_u6</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> </dependencies>
将IK分词器配置文件拷贝到resource目录下,IK的配置文件提供了Ik强大的拓展功能,能将你需要不分词的当成一个词语
--------------------------------------------增加部分-----------------------------------------------------------------------------------------------
1 添加单个document文档在lucene中
//添加一个documt,相当于添加文章或者文章标题,用ik分析器进行分词存储在一个地方 @Test public void luceneone() throws Exception { //获取检索对象 Directory directory= FSDirectory.open(new File("f:\\test")); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST,new IKAnalyzer()); IndexWriter indexWriter= new IndexWriter(directory,indexWriterConfig); //获取文档 Document document = new Document(); //将文档加入lucene中 document.add(new TextField("content","黑马程序员蓝瘦香菇隔壁二大爷", Field.Store.YES)); //text类型 document.add(new IntField("id",1, Field.Store.YES)); //int类型 document.add(new StringField("title","黑马咋了", Field.Store.YES)); //string类型 indexWriter.addDocument(document); indexWriter.commit(); indexWriter.close(); }
执行结果:
2 添加多个document文档在lucene中
//添加多个document @Test public void lucenetwo() throws IOException { //创建写入对象 Directory directory=FSDirectory.open(new File("f:\\test")); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST,new IKAnalyzer()); IndexWriter indexWriter = new IndexWriter(directory,indexWriterConfig); //写入多个文档 List<Document> list =new ArrayList<Document>(); //添加五个document for(int i=1;i<=5;i++){ //往一个document中添加faild,内容 Document document = new Document(); document.add(new IntField("pid",i, Field.Store.YES)); document.add(new StringField("pname","格力空调", Field.Store.YES)); document.add(new TextField("content","我是世界上最好的冰箱,好空调格力造!!!"+i+"号", Field.Store.YES)); list.add(document);//将一个document放入list中 } indexWriter.addDocuments(list); //执行 indexWriter.commit(); //关闭资源 indexWriter.close(); }
通过以上两个步骤,添加文档document到lucene已经完成了分词,结果在上图中有展示,,下面我们将模拟用户索引查询分词库中的内容
-------------------------------------查询板块-------------------------------------------------------------------------------------------------
3 单个字段查询 QueryParse
//单字段查询 //QueryParse只用用于查询分词后的对象,不能查询不分词的,StringFiled查询不了. @Test public void indexSercher() throws Exception { //创建查询对象 IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("f:\\test")))); //执行查询 //要想得到query必须使用查询 QueryParser queryParser=new QueryParser("pname",new IKAnalyzer()); Query query = queryParser.parse("格力空调"); TopDocs topDocs = indexSearcher.search(query, Integer.MAX_VALUE); int totalHits = topDocs.totalHits;//返回得分文档的总条数 System.out.println(totalHits); ScoreDoc[] scoreDocs = topDocs.scoreDocs; //返回得分文档的数组 for (ScoreDoc scoreDoc : scoreDocs) { float score = scoreDoc.score; //根据得分文档查询一条信息的分数 int id = scoreDoc.doc; //根据得分文档查询自身id(这个id是lucene自己编写的从0开始) Document doc = indexSearcher.doc(id);//查询对象根据id查询到自身的文档doc //获取这个id对应文档doc的,根据doc获取内容 //System.out.println(doc); String content = doc.get("pname");//获取一个document中的具体信息 System.out.println("查询的总条数:"+totalHits+"----分数:"+score+"-----文档信息content:"+content); } }
结果
4 多个字段查询 MultitFieldQueryParser
//多字段查询,本例中是使用title和pname两个字段查询,只要有一个字段里面有"格力空调",那就返回结果. //多字段查询解析器 MultitFieldQueryParser @Test public void multiFieldQueryParseToLUcene() throws Exception { //查询对象 IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("F:\\test")))); //执行查询 MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(new String[]{"title","pname"},new IKAnalyzer()); Query query = multiFieldQueryParser.parse("格力空调"); TopDocs topDocs = indexSearcher.search(query, 20); //通过topDocs获取总条数,和总id int totalHits = topDocs.totalHits;//获取总条数 ScoreDoc[] scoreDocs = topDocs.scoreDocs;//总分数的id for (ScoreDoc scoreDoc : scoreDocs) { float score = scoreDoc.score;//每条的分数 int id = scoreDoc.doc;//每条的id(lucene自己编的) Document doc = indexSearcher.doc(id);//根据id获取具体文档 System.out.println(doc); } }
结果:
5 词条查询 TermQuery
//词条查询 //词条查询可以查询到分词和不分词的 TermQuery @Test public void TermQueryToLucene() throws IOException { //获取查询对象IndexSERCHER IndexSearcher indexSearcher=new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("f:\\test")))); //执行查询 TermQuery termQuery=new TermQuery(new Term("title","黑马咋了")); TopDocs topDocs = indexSearcher.search(termQuery, Integer.MAX_VALUE); int totalHits = topDocs.totalHits;//总条数 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { float score = scoreDoc.score;//每条查询结果的得分 int id = scoreDoc.doc;//每条查询结果的id(lucene自己编码) Document doc = indexSearcher.doc(id); System.out.println(doc); String title = doc.get("title"); System.out.println("共查询到"+totalHits+"条数据.-----"+"title="+title); } }
结果
6 通配符查询 WildcardQuery
//通配符查询 WildcardQuery //*代表0-多个字符 ?代表1个占位符 @Test public void wilCardQueryToLuene() throws IOException { //获取查询对象 IndexSearcher indexSearcher=new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("f:\\test")))); //执行查询 WildcardQuery wildcardQuery = new WildcardQuery(new Term("pname", "格力*")); TopDocs topDocs = indexSearcher.search(wildcardQuery, Integer.MAX_VALUE); int totalHits = topDocs.totalHits;//查询到的总条数 ScoreDoc[] scoreDocs = topDocs.scoreDocs;//查询到的分数数组 for (ScoreDoc scoreDoc : scoreDocs) { float score = scoreDoc.score;//具体一条查询结果的分数 int id = scoreDoc.doc;//具体一条查询结果的id Document doc = indexSearcher.doc(id);//根据id获取具体的document System.out.println(doc); String title = doc.get("title");//通过document获取title System.out.println("共查询到"+totalHits+"条信息.---结果是:"+title); } }
结果: 因为查询到的document中没有title属性,所以结果是null,如果将java代码中doc.get("title"),换成doc.get("content")就会有内容
7 模糊查询 FuzzyQuery
//模糊查询:FuzzyQuery 一个中文占两个字符,默认模糊匹配2个字符 @Test public void fuzzyQueryToLusene() throws IOException { //获取查询对象 IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("f:\\test")))); //执行查询 FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term("pname", "格力空")); TopDocs topDocs = indexSearcher.search(fuzzyQuery, Integer.MAX_VALUE); int totalHits = topDocs.totalHits; //获取总条数 ScoreDoc[] scoreDocs = topDocs.scoreDocs; //获取查询结果的分数数组 for (ScoreDoc scoreDoc : scoreDocs) { float score = scoreDoc.score;//获取一条查询结果的分数 int id = scoreDoc.doc;//获取一条查询结果的id(lusene自己排,从0开始) //通过id获取doc Document doc = indexSearcher.doc(id); //System.out.println(doc); //获取doc中的内容 String content = doc.get("content"); System.out.println("共查询到"+totalHits+"条数据,---结果:"+content); } }
查询结果:
8 数字查询 NumericRangeQuery
//数字范围查询 NumericRangeQuery @Test public void numQueryToLusene() throws IOException { //获取查询对象 IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("f:\\test")))); //执行查询 NumericRangeQuery<Integer> numerice = NumericRangeQuery.newIntRange("pid", 0, 1, true, true);//true表示包括,false表示不包括,这里的 意思是包括0,也包括1 TopDocs topDocs = indexSearcher.search(numerice, 20); int totalHits = topDocs.totalHits;//查询到的总数据条数 ScoreDoc[] scoreDocs = topDocs.scoreDocs;//查询到的分数数组 for (ScoreDoc scoreDoc : scoreDocs) { float score = scoreDoc.score;//查询到的分数 int id = scoreDoc.doc;//查询到结果的id Document doc = indexSearcher.doc(id);//根据id获取文档 System.out.println(doc); } }
结果:
9 组合查询 BooleanClauses
//组合查询 BooleanClauses ,组合是各种的查询的交集 @Test public void booleanQueryToLusene() throws IOException { //获取查询对象 IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("f:\\test")))); //执行查询 BooleanQuery booleanClauses = new BooleanQuery(); //组合模糊查询,条件为必须 FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term("pname", "格力空")); //再组合一个数字范围的查询结果 NumericRangeQuery<Integer> intRange = NumericRangeQuery.newIntRange("pid", 0, 2, true, false); //在组合查询中加入模糊查询和数字范围查询 booleanClauses.add(fuzzyQuery, BooleanClause.Occur.MUST);//must表示必须有的 booleanClauses.add(intRange, BooleanClause.Occur.MUST_NOT);//表示为必须没有 TopDocs topDocs = indexSearcher.search(booleanClauses, 20); int totalHits = topDocs.totalHits;//获取查询总数 ScoreDoc[] scoreDocs = topDocs.scoreDocs;//获取查询分数的数组 for (ScoreDoc scoreDoc : scoreDocs) { float score = scoreDoc.score; //获取一条查询结果的分数 int id = scoreDoc.doc;//获取一条查询结果的id; //根据id获取具体文档 Document doc = indexSearcher.doc(id); System.out.println(doc); } } 结果