最近写了一个敏感词过滤的程序,并且将其部署到服务器端。
程序需要实现的功能很简单,客户端传输一段数据,服务器端接收到数据之后进行关键词过滤之后再将过滤之后的数据返回给客户端。
因此思路就比较清晰了,首先是使用socket进行连接,连接之后从socket中取出数据流,将数据流进行解析提取出数据,并且将数据进行关键词过滤,然后将过滤后的数据进行封装,封装之后放到socket的流中发回给客户端,之后断开连接。
Socket连接
第一步是实现socket连接,实现的思路是使用ServerSocket类,通过serverSocket.accept()获取Socket对象,然后读取socket的InetAddress进行IP过滤,之后将该socket放到线程池中,并且提交,具体代码如下:
import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.ServerSocket; import java.net.Socket; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; public class Server { private int queueSize = 20; private int port = 50001; private ServerSocket serverSocket; public Server(){ try{ serverSocket = new ServerSocket(); //关闭serverSocket时,立即释放serverSocket绑定端口以便端口重用,默认为false serverSocket.setReuseAddress(true); //accept等待连接超时时间为1000毫秒,默认为0,永不超时 // serverSocket.setSoTimeout(10000); //为所有accept方法返回的socket对象设置接收缓存区大小,单位为字节,默认值和操作系统有关 serverSocket.setReceiveBufferSize(128*1024); //设置性能参数,可设置任意整数,数值越大,相应的参数重要性越高(连接时间,延迟,带宽) serverSocket.setPerformancePreferences(3, 2, 1); //服务端绑定至端口,queueSize为服务端连接请求队列长度 serverSocket.bind(new InetSocketAddress(port), queueSize); }catch(Exception e){ e.printStackTrace(); } } public void service(SensitivewordFilter filter,ExecutorService pool){ while(true){ Socket socket = null; try{ //从连接请求队列中取出一个客户连接请求,创建与客户连接的socket对象 //如果队列中没有请求,accept方法就会一直等待 //设置了超时时间之后会抛出异常 socket = serverSocket.accept(); InetAddress address=socket.getInetAddress(); System.out.println(address.getHostAddress()); Callable<Void> task=new ThreadTask(socket,filter); pool.submit(task); }catch(Exception e){ e.printStackTrace(); } } } public static void main(String[] args) { ExecutorService pool= Executors.newFixedThreadPool(50);//使用线程池防止DDRS使服务器停止工作 final SensitivewordFilter filter = new SensitivewordFilter(); Server server = new Server(); server.service(filter,pool); } }
上述代码中有一个关键的类为ThreadTask,该类implements了Callable接口,具体功能为使用线程处理来自ServerSocket的socket。其重写了call方法。
import java.io.*; import java.net.Socket; import java.util.concurrent.Callable; public class ThreadTask implements Callable<Void> { private Socket socket; private SensitivewordFilter filter; public ThreadTask(Socket socket, SensitivewordFilter filter) { this.socket = socket; this.filter = filter; } @Override public Void call() throws Exception { InputStream is = null; InputStreamReader isr = null; BufferedReader br = null; OutputStream os = null; OutputStreamWriter outputStreamWriter = null; try { //获取输入流,并读取客户端信息 is = socket.getInputStream(); isr = new InputStreamReader(is, "UTF-8"); br = new BufferedReader(isr); String info = null; String requests=""; while(true){ info=br.readLine(); if(info==null){ break; } requests+=info.trim(); } //info=br.readLine(); socket.shutdownInput();//关闭输入流 System.out.println(requests); info=JsonFilter.FromStringToJson(filter.replaceSensitiveWord(JsonFilter.FromJsonToString(requests),1,"*")); //获取输出流,响应客户端的请求 outputStreamWriter = new OutputStreamWriter(socket.getOutputStream(), "UTF-8"); BufferedWriter bufferedWriter=new BufferedWriter(outputStreamWriter); bufferedWriter.write(info+"\r\n"); bufferedWriter.flush(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { //关闭资源 try { if (outputStreamWriter != null) outputStreamWriter.close(); if (os != null) os.close(); if (br != null) br.close(); if (isr != null) isr.close(); if (is != null) is.close(); if (socket != null) socket.close(); } catch (IOException e) { e.printStackTrace(); } } return null; } public static String getEncoding(String str) { String encode = "GB2312"; try { if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是GB2312 String s = encode; return s; //是的话,返回“GB2312“,以下代码同理 } } catch (Exception exception) { } encode = "ISO-8859-1"; try { if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是ISO-8859-1 String s1 = encode; return s1; } } catch (Exception exception1) { } encode = "UTF-8"; try { if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是UTF-8 String s2 = encode; return s2; } } catch (Exception exception2) { } encode = "GBK"; try { if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是GBK String s3 = encode; return s3; } } catch (Exception exception3) { } return "null"; } }
数据处理
在获取到了客户端发送的数据之后,需要从json格式的数据中取出相应的需要处理的数据,为此重新写了一个类为JsonFilter,该类中含有两个静态方法分别为从JSON中取出需要进行关键词过滤的数据和将数据封装到JSON之中。
import org.json.JSONObject; import java.util.Date; public class JsonFilter { public static String FromJsonToString(String json) { try { JSONObject jsonObject =new JSONObject(json); return jsonObject.getString("data"); }catch (Exception e){ e.printStackTrace(); } return ""; } public static String FromStringToJson(String json){ try { JSONObject jsonObject=new JSONObject(); jsonObject.put("data",json); jsonObject.put("time",(new Date()).toString()); jsonObject.put("number",100); return jsonObject.toString(); }catch (Exception e){ e.printStackTrace(); } return ""; } }
关键词过滤
关键词过滤采用的是DFA方法
DFA方法的基本思路是构造树,例如我们需要过滤的词汇为“深度学习”,“深坑”,则该算法模型为根据关键词构造树,树结构为
{深-{度-{学-{习}},坑}}
所以具体构造代码如下
import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; /** * @Description: 初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型 */ public class SensitiveWordInit { private String ENCODING = "GBK"; //字符编码 public HashMap sensitiveWordMap; public SensitiveWordInit(){ super(); } public Map initKeyWord(){ try { //读取敏感词库 Set<String> keyWordSet = readSensitiveWordFile(); //将敏感词库加入到HashMap中 addSensitiveWordToHashMap(keyWordSet); } catch (Exception e) { e.printStackTrace(); } return sensitiveWordMap; } private void addSensitiveWordToHashMap(Set<String> keyWordSet) { sensitiveWordMap = new HashMap(keyWordSet.size()); //初始化敏感词容器,减少扩容操作 String key = null; Map nowMap = null; Map<String, String> newWorMap = null; //迭代keyWordSet Iterator<String> iterator = keyWordSet.iterator(); while(iterator.hasNext()){ key = iterator.next(); //关键字 nowMap = sensitiveWordMap; for(int i = 0 ; i < key.length() ; i++){ char keyChar = key.charAt(i); //转换成char型 Object wordMap = nowMap.get(keyChar); //获取 if(wordMap != null){ //如果存在该key,直接赋值 nowMap = (Map) wordMap; } else{ //不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个 newWorMap = new HashMap<String,String>(); newWorMap.put("isEnd", "0"); //不是最后一个 nowMap.put(keyChar, newWorMap); nowMap = newWorMap; } if(i == key.length() - 1){ nowMap.put("isEnd", "1"); //最后一个 } } } } /** * 读取敏感词库中的内容,将内容添加到set集合中 */ private Set<String> readSensitiveWordFile() throws Exception{ Set<String> set = null; File file = new File("./SensitiveWord.txt"); //读取文件,测试使用的文件,正式部署到服务器上需要替换该文件 InputStreamReader read = new InputStreamReader(new FileInputStream(file),ENCODING); try { if(file.isFile() && file.exists()){ //文件流是否存在 set = new HashSet<String>(); BufferedReader bufferedReader = new BufferedReader(read); String txt = null; while((txt = bufferedReader.readLine()) != null){ //读取文件,将文件内容放入到set中 set.add(txt); } } else{ //不存在抛出异常信息 throw new Exception("敏感词库文件不存在"); } } catch (Exception e) { throw e; }finally{ read.close(); //关闭文件流 } return set; } }