Java 爬虫大众点评测试

/*
 * Created by JFormDesigner on Fri Aug 21 17:15:14 CST 2020
 */

package josupa;

import java.awt.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.swing.*;

/**
 * @author 1
 */
public class JsoupJframeTest extends JFrame {
    public static void main(String[] args) {
        new JsoupJframeTest().setVisible(true);
    }
    public JsoupJframeTest() {
        initComponents();

        setTitle("大众点评");  // 标题
        setResizable(false); // 固定窗体
        //setDefaultCloseOperation(JFrame.DO_NOTHING_ON_CLOSE); //退出窗口不适用
        setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); //关闭窗口 ,退出进程

        //显示屏幕中央
        int width = 640;  //宽度
        int height =  535;  // 高度
        Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
        /** 屏幕宽度 */
        int screenWidth = screenSize.width;
        /** 屏幕高度 */
        int screenHeight = screenSize.height;
        setLocation((screenWidth - width) / 2, (screenHeight - height) / 2);
        setMinimumSize(new Dimension(width,height));  //窗体大小
    }

    private void initComponents() {
        // JFormDesigner - Component initialization - DO NOT MODIFY  //GEN-BEGIN:initComponents
        textField1 = new JTextField();
        button1 = new JButton();
        scrollPane1 = new JScrollPane();
        //table1 = new JTable();
        label1 = new JLabel();
        label2 = new JLabel();
        textField2 = new JTextField();
        textField1.setText("http://www.dianping.com/shanghai/ch10/g101p");
        textField2.setText("navCtgScroll=100; _lxsdk_cuid=17212861813c8-0732406bf2d0a6-3e385b04-100200-1721286181366; _lxsdk=17212861813c8-0732406bf2d0a6-3e385b04-100200-1721286181366; _hc.v=1045e80c-8a95-04c7-8a3c-da114313dede.1589448678; fspop=test; cy=1; cye=shanghai; s_ViewType=10; thirdtoken=5a77eb50-3768-4f5a-b331-90bf1fb7e12d; _thirdu.c=7bfa71bdc942c0382a17cd44b3c5fc38; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1597992820,1597993916; dplet=14e36aa35571643cec9cde0bde90b12d; dper=95aa894edc5da0bbd2a32fcd5680a3065ee3a929c2374e86c5a12a28aa60606efbf406133a910d1804608a392c9d47a0e922a664a5603ce174b2035990ab20dee5f279c08ffef0fd590811d1dcc0feb5b10d0c934c29521d819e3d47fbc6be61; ll=7fd06e815b796be3df069dec7836c3df; ua=dpuser_5993284789; ctu=d8f4e03e3e7b70b44786831a696b2b5df41365da651e001c6f6bb6696ff5c7fd; uamo=15601946082; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1597994247; _lxsdk_s=1740fcb5c22-cb0-849-815%7C%7C629");
        //======== this ========
        Container contentPane = getContentPane();
        contentPane.setLayout(null);
        contentPane.add(textField1);
        textField1.setBounds(75, 45, 430, 30);

        //---- button1 ----
        button1.setText("\u6293\u53d6");
        contentPane.add(button1);
        button1.setBounds(new Rectangle(new Point(535, 50), button1.getPreferredSize()));
        String[][] playerInfo = new String[500][3];
        List<String []> listStr = new ArrayList<>();

        button1.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                String url = textField1.getText();
                String cookie = textField2.getText();
                if(url == null || "".equals(url)){
                    JOptionPane.showMessageDialog(null,"抓取地址不存在");
                    return;
                }
                if(cookie == null || "".equals(cookie)){
                    JOptionPane.showMessageDialog(null,"cookie不存在");
                    return;
                }
                try {
                    JsoupTest.sendHtml(url,cookie);
                } catch (IOException ex) {
                    ex.printStackTrace();
                }
                List<Map> list = JsoupTest.list;
                if(list == null){
                    JOptionPane.showMessageDialog(null,"未抓取到数据");
                    return;
                }
                Runnable runnable = new Runnable() {
                    @Override
                    public void run() {
                        for(int i=0;i<list.size(); i++){
                            Map<String,String> map = list.get(i);
                            playerInfo[i][0] = map.get("url");
                            playerInfo[i][1] = map.get("mer");
                            playerInfo[i][2] = map.get("score");
                        }
                    }
                };

                SwingUtilities.invokeLater(runnable);

            }
        });
        String [] names = {"地址","门店","评分"};
        table1 = new JTable(playerInfo,names);
        //======== scrollPane1 ========
        {
            scrollPane1.setViewportView(table1);
        }
        contentPane.add(scrollPane1);
        //contentPane.repaint();
        scrollPane1.setBounds(25, 90, 580, 395);

        //---- label1 ----
        label1.setText("\u5730\u5740\uff1a");
        contentPane.add(label1);
        label1.setBounds(new Rectangle(new Point(30, 50), label1.getPreferredSize()));

        //---- label2 ----
        label2.setText("Cookie\uff1a");
        contentPane.add(label2);
        label2.setBounds(new Rectangle(new Point(15, 15), label2.getPreferredSize()));
        contentPane.add(textField2);
        textField2.setBounds(75, 15, 530, textField2.getPreferredSize().height);

        {
            // compute preferred size
            Dimension preferredSize = new Dimension();
            for(int i = 0; i < contentPane.getComponentCount(); i++) {
                Rectangle bounds = contentPane.getComponent(i).getBounds();
                preferredSize.width = Math.max(bounds.x + bounds.width, preferredSize.width);
                preferredSize.height = Math.max(bounds.y + bounds.height, preferredSize.height);
            }
            Insets insets = contentPane.getInsets();
            preferredSize.width += insets.right;
            preferredSize.height += insets.bottom;
            contentPane.setMinimumSize(preferredSize);
            contentPane.setPreferredSize(preferredSize);
        }
        pack();
        setLocationRelativeTo(getOwner());
        // JFormDesigner - End of component initialization  //GEN-END:initComponents




    }

    // JFormDesigner - Variables declaration - DO NOT MODIFY  //GEN-BEGIN:variables
    private JTextField textField1;
    private JButton button1;
    private JScrollPane scrollPane1;
    private JTable table1;
    private JLabel label1;
    private JLabel label2;
    private JTextField textField2;
    // JFormDesigner - End of variables declaration  //GEN-END:variables
}
package josupa;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Created with IntelliJ IDEA.
 *
 */
public class JsoupTest {

    private static int countPage = 50;
    private static int start = 1;
    public static List<Map> list = new ArrayList<>();

    public static  void sendHtml(String url,String cookie) throws IOException {
        //String url = "http://www.dianping.com/shanghai/ch10/g101p";
        run(url , start,countPage,cookie);
    }

    public static void main(String[] args) throws IOException {
        String url = "http://www.dianping.com/shanghai/ch10/g101p";
        String cookie = "";
        run(url , start,countPage,cookie);
        for(Map m: list){
            System.out.println(m.toString());
        }
    }


    private static void run(String url,int start,int countPage,String cookie)throws IOException{
        Document document = Jsoup.connect(url + start)
                //cookie
                .header("Cookie",cookie)
                .get();
//        Elements elementsByTag  = document.getElementById("shop-all-list").getElementsByClass("tit");
//        for(Element tag : elementsByTag){
//            System.out.println(tag);
//            for(Element a : tag.getElementsByAttributeValue("data-hippo-type","shop")){
//                System.out.println(" 门店:" + a.attr("title") +", 【地址:"+a.attr("href") +"】");
//            }
//        }

        try {
            Elements elementsByTag  = document.getElementById("shop-all-list").getElementsByClass("txt");
            if(elementsByTag != null){
                for(Element tag : elementsByTag){
                    Map<String,String> map = new HashMap<>();
                    for(Element a : tag.getElementsByAttributeValue("data-hippo-type","shop")){
                        System.out.println(" 门店:" + a.attr("title") +", 【地址:"+a.attr("href") +"】");
                        map.put("url",a.attr("href"));
                        map.put("mer",a.attr("title"));
                    }
                    Elements comments = tag.getElementsByClass("comment");
                    for(Element com : comments){
                        System.out.println(com.getElementsByClass("star_score").text());;
                        map.put("score",com.getElementsByClass("star_score").text());
                    }

                    list.add(map);

                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }


        //获取页数
        Elements elements = document.getElementsByClass("PageLink");
        String page = elements.get(elements.size()-1).text();
        System.out.println("第"+start+"页,"+page);
        if(countPage  == Integer.valueOf(page)){
            start++;
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            run(url,start,countPage,cookie);
        }
    }

}

跑几次就封了。。。

猜你喜欢

转载自blog.csdn.net/jintaocccq/article/details/108198745
今日推荐