java获取网页主信息之一:html树操作[转]

1.节点操作 

package Source;    
   
   
//html树节点类    
public class Node    
{    
    //构造方法    
    public Node()    
    {    
        content = "";    
        hasContent = false;    
        parent = null;    
        isLeaf = true;    
    }    
        
    //构造方法,初始化此节点的内容,标签,与其父辈节点    
    public Node(String content, String tag, Node parent)    
    {    
        this.content = content;    
        this.tag = tag;    
        if(content.equalsIgnoreCase(""))    
            hasContent = false;    
        else   
            hasContent = true;    
        this.parent = parent;    
        isLeaf = true;    
    }    
        
    //添加节点内容    
    public void addContent(String str)    
    {    
        content=content+str;    
        if(content.equalsIgnoreCase("")) hasContent = false;    
        else hasContent = true;    
        return;    
    }    
        
    //设置为叶子    
    public void setLeaf(boolean is)    
    {    
        isLeaf = is;    
    }    
        
    //设置为块    
    public void setBlock(boolean is)    
    {    
        isBlock = is;    
    }    
   
    public String toString()    
    {    
        return content;    
    }    
   
    String content;    
    String tag;    
    boolean hasContent;    
    boolean isLeaf;    
    boolean isBlock;    
    Node parent;    
}

 2.树操作 

package Source;    
   
import java.util.LinkedList;    
   
   
public class HTree    
{    
    //构造方法,初始化    
    public HTree()    
    {    
        list = new LinkedList();    
    }    
        
    //插入节点    
    public void insert(Node node)    
    {    
        list.add(node);    
    }    
        
        
    //打印整棵树的节点的信息    
    public void print()    
    {    
        int len = list.size();    
        for(int i = len - 1; i >= 0; i--)    
        {    
            Node node = (Node)list.get(i);    
            String str = node.content.trim();    
            if(!str.equals("")) System.out.println(str);    
        }    
   
    }    
        
    //打印块的信息    
    public void print2()    
    {    
        int len = list.size();    
        for(int i = len - 1; i >= 0; i--)    
        {    
            Node node = (Node)list.get(i);    
            if(node.isBlock) System.out.println(node.content);    
        }    
   
    }    
        
    //合并节点,将叶节点合并至其双亲    
    public void merge()    
    {    
        int len = list.size();    
        for(int i = len - 1; i >= 0; i--)    
        {    
            Node node = (Node)list.get(i);    
            if(node.isLeaf)    
            {    
                Node curr = node;    
                String str = curr.content;    
                while(curr != null)     
                {    
                    Node next = curr.parent;    
                    if(next != null)    
                    {    
                        if(next.hasContent)    
                        {    
                            next.addContent(str);    
                            next.setLeaf(true);    
                            node.setBlock(false);    
                            break;    
                        }    
                        next.setLeaf(false);    
                        curr = next;    
                    }     
                    else curr = null;    
                }    
                if(curr == null) node.setBlock(true);    
            }    
        }    
   
    }    
        
    //获取块信息    
    public String[] getBlock()    
    {    
        int len = list.size();    
        int num = 0;    
        //获取非空节点的个数    
        for(int i = len - 1; i >= 0; i--)    
        {    
            Node node = (Node)list.get(i);    
            String str = node.content.trim();    
            if(!str.equals("")) num++;    
        }    
   
        String contBlock[] = new String[num];    
        num = 0;    
            
        //返回信息    
        for(int i = len - 1; i >= 0; i--)    
        {    
            Node node = (Node)list.get(i);    
            String str = node.content.trim();    
            if(!str.equals("")) contBlock[num++] = str;    
        }    
        return contBlock;    
    }    
   
    private LinkedList list;    
}   
 

猜你喜欢

转载自liuxinglanyue.iteye.com/blog/833482