springmvc实现敏感词过滤(2)
springmvc实现敏感词过滤思路:首先在数据库构建一个敏感词表用于记录敏感词,springmvc获取用户发布话语的请求,然后读取数据库的敏感词库,用于对用户话语中的敏感词进行对比,如果存在则用特殊符号进行代替,输出替代后的语句。
如果你是把敏感词库写在文件上,可以参考:springmvc实现敏感词过滤(1)实现效果:
数据表:
代码:
处理用户发布的请求:
@RequestMapping("uploadfiles") public String uploadfiles(ContentByImg contentByImg,Model model) throws Exception { System.out.println("---content:0"+contentByImg.toString()); // 上传图片,返回图片名称集合 List<String> list_image = fileUtils.getImgPath(contentByImg.getFiles()); String []pic=new String[3]; int i=0; for (String s:list_image){ System.out.println("filename:"+s); pic[i++]=s; } Content content1=new Content(); content1.setPic1(pic[0]); content1.setPic2(pic[1]); content1.setPic3(pic[2]); content1.setUserid(contentByImg.getUserid()); //敏感词过滤,获得敏感词库 List<Sensitive>list=sensitiveService.selectAll(); Set<String>set=new HashSet<>(list.size()); for(Sensitive sensitive:list){ set.add(sensitive.getWord()); } //初始化敏感词库 HashMap map=sensiteWord.initSensitivateWord(set); String word=sensiteWord.replaceSensitiveWord(contentByImg.getContent(),"*"); System.out.println("content:"+word); //把过滤后的内容插入内容表 content1.setContent(word); contentService.insertContent(content1); //记录用户说的敏感话语 Set<String>set1=sensiteWord.getSensitivateWord(contentByImg.getContent()); System.out.println("content:"+set1.size()+"---"+set1); if(set1.size()>0){ Sensrecord sensrecord=new Sensrecord(); sensrecord.setUserid(contentByImg.getUserid()); sensrecord.setSize(set1.size()); StringBuffer stringBuffer=new StringBuffer(); for(String s:set1){ stringBuffer.append(s+","); } String words=stringBuffer.toString(); sensrecord.setWord(words); sensRecordService.insertSensRecord(sensrecord); } Page page=new Page(); List<Content>listCotent=contentService.selectPageList(page); int totals=contentService.selectPageCount(page); page.setTotalRecord(totals); model.addAttribute("listContent",listCotent); model.addAttribute("page",page); return "index"; }
对敏感词过滤的操作:
/** * 敏感词过滤 * 1.读取敏感词库 * 2.初始化敏感词库,构建dfa算法模型 * Created by ASUS on 2018/5/24 * * @Authod Grey Wolf */ @Component public class SensiteWord { /** * 输出set的内容 * @param set */ public void sysSet(Set<String> set){ for (String word:set){ System.out.println("word:"+word); } } /** * 输出map的内容 * @param map */ public void sysMap(HashMap map){ Iterator entries = map.entrySet().iterator(); while (entries.hasNext()) { Map.Entry entry = (Map.Entry) entries.next(); Object key = entry.getKey(); Object value = entry.getValue(); System.out.println("Key = " + key + ", Value = " + value); } } /** * 返回敏感词库列表模型 * @param set * @return */ public HashMap initSensitivateWord(Set<String>set){ HashMap map=new HashMap(set.size()); String key=null; Map nowMap=null; HashMap<String,String> newMap=null; //迭代敏感词库 Iterator<String> iterator=set.iterator(); while (iterator.hasNext()){ key=iterator.next(); nowMap=map; int i; for (i=0;i<key.length();i++){ //转换成char型 char keyChar=key.charAt(i); //获取 Object wordMap=nowMap.get(keyChar); //如果存在,直接赋值 if(wordMap!=null){ nowMap= (Map) wordMap; }else {//不存在,则构建一个map.将isEnd 设置为0,最后一个为1 newMap=new HashMap<String,String>(); newMap.put("isEnd","0"); nowMap.put(keyChar,newMap); nowMap=newMap; } //最后一个 if (i==key.length()-1){ nowMap.put("isEnd","1"); } } } return map; } /** * 替换敏感字符串 * @param txt * @param replaceChar * @return */ public String replaceSensitiveWord(String txt,String replaceChar) throws Exception { String resultTxt=txt; //获取内容中所有的敏感词 Set<String>set=getSensitivateWord(txt); System.out.println("获取所有敏感词:"); sysSet(set); Iterator<String> iterator=set.iterator(); String word=null; String replaceTxt=null; while (iterator.hasNext()){ word=iterator.next(); //获取要替换的字符串 replaceTxt=getReplaceTxt(replaceChar,word.length()); resultTxt=resultTxt.replaceAll(word,replaceTxt); } return resultTxt; } /** * 获取内容的敏感词 * @param txt * @return */ public Set<String> getSensitivateWord(String txt) throws Exception { Set<String>set=new HashSet<>(); int i; for (i=0;i<txt.length();i++){ //判断是否包含敏感字符,返回敏感个数 int length=checkSensitiveWord(txt,i); if(length>0){ set.add(txt.substring(i,i+length)); //因为for会自增 i=i+length-1; } } return set; } /** * 检查文字中是否包含敏感字符 * @param txt * @param begin * @return 如果存在,则返回敏感词字符的长度,不存在返回0 */ private int checkSensitiveWord(String txt, int begin) throws Exception { //敏感词结束标识符 boolean flag=false; //匹配标识数默认为0 int matchFlag=0; char word=0; Map nowMap=initSensitivateWord(readSensitivateWord()); int i; for (i=begin;i<txt.length();i++){ word=txt.charAt(i); //获取指定key nowMap=(Map)nowMap.get(word); //存在,则判断是否为最后一个 if(nowMap!=null){ //找到相应的key,匹配标识为1 matchFlag++; if("1".equals(nowMap.get("isEnd"))){ flag=true; } }else{//不存在,直接返回 break; } } //长度必须大于等于1,为词 if (matchFlag < 2|| !flag) { matchFlag=0; } return matchFlag; } /** * 返回要替换敏感字符串 * @param replaceChar * @param length * @return */ private String getReplaceTxt(String replaceChar, int length) { int i; String word=replaceChar; for (i=1;i<length;i++){ word+=replaceChar; } return word; } }
我的座右铭:不会,我可以学;落后,我可以追赶;跌倒,我可以站起来;我一定行。