java爬虫杭州房价 用jsoup

初学者学爬虫可以玩玩我做的这个小demo,原理就是用了jsoup这个小玩意,数据源是房天下的数据,杭州的房价,总共爬了100页。

两个类,超简单的,一个是爬虫demo,另一个是简单的开发商类,里面存放了这个开放商有多少套房源,均价多少,总价多少,方便后面对所有开放商的均价做了个排行。

/**
 * Created by Precious_Life on 2018/8/14.
 */

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.*;
import java.util.TreeMap;

public class JsoupBuildingTest {
    public static int count = 0;

    public static synchronized void countIncrease() {
        count++;
    }
    public static synchronized void addkaifashang(String name,int zongjia,int junjia){
        if(treemap.containsKey(name)){
            kaifashangInfo tmp=treemap.get(name);
            int tmpzongjia=tmp.getZongjia()+zongjia;
            int tmpcount=tmp.getHousecount()+1;
            int tmpjunjia=(tmp.getJunjia()+junjia)/2;
            treemap.put(name,new kaifashangInfo(name,tmpcount,tmpzongjia,tmpjunjia));
        }else{
            treemap.put(name,new kaifashangInfo(name,1,zongjia,junjia));
        }
    }


    public static Map<String,kaifashangInfo> treemap=new TreeMap<String,kaifashangInfo>();
    public static void main(String[] args) throws IOException {

        Runnable runnable = new Runnable() {
            @Override
            public void run() {
                try {
                    for (int i = 1; i <= 100; i++) {
                        Document document = Jsoup.connect("http://esf.hz.fang.com/house-a0154/i3" + i)
                                .userAgent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; MALC)")
                                .timeout(999999999)
                                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
                                .header("Accept-Encoding", "gzip, deflate")
                                .header("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3")
                                .header("Connection", "keep-alive")
                                .header("Host", "esf.hz.fang.com")
                                //是忽略请求类型Id
                                .ignoreContentType(true)
                                .get();
                        Element div_element = document.getElementsByClass("shop_list").get(0);
                        Elements elements = div_element.select("dl");
                        for (Element element : elements) {
                            count++;
                            Elements elements1 = element.children();
                            Elements tmp = elements1.get(1).select("a");
                            if (tmp.size() == 0) continue;
                            String kaifashang = elements1.get(1).select("a").get(2).attr("title");
                            String address = elements1.get(1).getElementsByClass("add_shop").select("span").text();
                            String leixing = elements1.get(1).getElementsByClass("tel_shop").text();
                            Element ss = elements1.get(2);
                            String zongjia = elements1.get(2).getElementsByClass("red").text();
                            String junjia = elements1.get(2).select("span").get(1).text();
                            int junjia_final=(int)Double.parseDouble(junjia.substring(0,junjia.indexOf("元")));
                            int zongjia_final=(int)Double.parseDouble(zongjia.substring(0,zongjia.indexOf("万")))*10000;
                            addkaifashang(kaifashang,zongjia_final,junjia_final);
//                            if(kaifashang.equals("星耀城"))
                            System.out.println("Num"+count+"  开发商" + kaifashang + " 地址:" + address + " 户型:" + leixing + " 单价:" + junjia_final + "元/每平 总价:" + zongjia_final+"元");
                        }
                    }

                    System.out.println("总共" + count + "套房源!");
                    System.out.println("==========================================");
                    System.out.println("==========================================");
                    System.out.println("==========================================");

                    List<Map.Entry<String, kaifashangInfo>> list = new ArrayList<Map.Entry<String, kaifashangInfo>>(treemap.entrySet());
                    Collections.sort(list,new Comparator<Map.Entry<String,kaifashangInfo>>() {
                        //升序排序
                        public int compare(Map.Entry<String, kaifashangInfo> o1, Map.Entry<String, kaifashangInfo> o2) {
                            return (int)(o1.getValue().getJunjia()-o2.getValue().getJunjia());
                        }
                    });
                    System.out.println("下面是滨江区的开放商房价排行");
                    int houseCount=0;
                    for (Map.Entry<String, kaifashangInfo> e: list) {
                        houseCount+=e.getValue().getHousecount();
                        System.out.println(e.getKey()+"  均价:"+e.getValue().getJunjia()+"元/平"+ " 房源共"+e.getValue().getHousecount()+"套");
                    }
                    System.out.println("滨江现有房源"+houseCount+"套");
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        };
        new Thread(runnable).start();
    }

}

下面就是开放商的类了,我都是用拼音进行命名的,应该很容易看懂吧。

/**
 * Created by Precious_Life on 2018/8/15.
 */
public class kaifashangInfo {
    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public int getHousecount() {
        return housecount;
    }

    public void setHousecount(int housecount) {
        this.housecount = housecount;
    }

    public int getZongjia() {
        return zongjia;
    }

    public void setZongjia(int zongjia) {
        this.zongjia = zongjia;
    }

    public int getJunjia() {
        return junjia;
    }

    public void setJunjia(int junjia) {
        this.junjia = junjia;
    }

    public String name;
    int housecount;
    int zongjia;
    int junjia;
    public kaifashangInfo(String name,int housecount,int zongjia,int junjia){
        this.name=name;
        this.housecount=housecount;
        this.junjia=junjia;
        this.zongjia=zongjia;
    }
}

对于上面的结果,就是及时输出了下房源的信息,然后又对房源数据进行了下分析,做了下均价排行。

截下图~~

这个图是各开放商的房源均价排行

把这个数据爬下来之后,有啥用么??没啥用。。。。。哈哈哈,查了下房价最低的这个聆涛苑到滨江这边的距离

要两个小时呢!!!!!!!!!!!

扫描二维码关注公众号,回复: 2828905 查看本文章

!!!!!!!!!!!!!!!!!

还是老老实实的拼吧,好好工作然后傍个富婆,房子就有了哈哈哈哈哈哈

猜你喜欢

转载自blog.csdn.net/q957967519/article/details/81673512