中文分词Java简单实现

版权声明:本文为博主原创文章,转载请注明出处。 https://blog.csdn.net/Su_Mo/article/details/78486700

中文分词Java简单实现
存备忘

package helloJavaWorld;

//用栈存取词语
class StackTest {
    private Object[] stack;
    //元素个数;
    private int size;
    //默认长度为10;
    public StackTest(){
        this(10);
    }
    //也可以自己设置长度,即容量;
    public StackTest(int len){
        stack = new Object[len];
    }
    //返回元素个数;
    public int size(){
        return size;
    }
    //返回数组长度,即容量;
    public int capacity(){
        return stack.length;
    }
    //实现动态的数组;
    public void ensureCapacity(){
        if(size() == capacity()){
            Object[] newStack = new Object[size() * 3 / 2 + 1];
            System.arraycopy(stack, 0, newStack, 0, size());
            stack = newStack;
        }
    }
    //入栈;
    public void push(Object o){
        size++;
        ensureCapacity();
        stack[size - 1] = o;
    }
    //判空;
    public boolean isEmpty(){
        return size == 0;
    }
    //出栈;
    public Object pop(){
        //首先要判空;
        if(isEmpty()){
            throw new ArrayIndexOutOfBoundsException("不能为空");
        }
        Object o = stack[--size];
        stack[size] = null;
        return o;
    }
    }

    /**
     * 建立Split类
     * 设置词典内容
     * @author zhangliang
     *
     */
    class Split {
        private String[] dictionary = {"我","是","武汉","理工大","理工大学","武汉理工大学","的","一名","二","年级","二年级","学生"};  //词典
        private String input = null;

        public Split(String input) {
            this.input = input;
        }
         //分词
        public void start() {
            String temp = null;
            StackTest stack = new StackTest(20);
            for(int i=0;i<this.input.length();i++) {
                temp = this.input.substring(i); 
                // 每次从字符串开头截取一个字,并存到temp中
                // 如果该词在词典中, 则删除该词并在原始字符串中截取该词
                if(this.isInDictionary(temp)) {
                    stack.push(temp);   //入栈
                    this.input = this.input.replace(temp, "");
                    i = -1;  // i=-1是因为要重新查找, 而要先执行循环中的i++
                }
            }

            // 当前循环完毕,词的末尾截去一个字,继续循环, 直到词变为空
            if(null != this.input && !"".equals(this.input)) {
                this.input = this.input.substring(0,this.input.length()-1);
                this.start();
            }

            //出栈
            while (!stack.isEmpty()) {
                System.out.print(stack.pop() + "  ");}
        }

        //判断当前词是否在词典中
        public boolean isInDictionary(String temp) {
            for(int i=0;i<this.dictionary.length;i++) {
                if(temp.equals(this.dictionary[i])) {
                    return true;
                }
            }
            return false;
        }
    }

    public class splitChinsesCharacter {
         public static void main(String[] args){
                String input = "我是武汉理工大学一名二年级的学生";  // 要匹配的字符串
                new Split(input).start();
            }
        }

猜你喜欢

转载自blog.csdn.net/Su_Mo/article/details/78486700