C#获取全国地区信息,精确到省市区乡镇居委会

闲来无聊爬了下全国的省市区乡镇居委会的信息,存入到数据。

以后做地址联动选择的时候可能用得着,这次可以精确到居委会

数据来源:国家统计局 2016年统计用区划代码和城乡划分代码(截止2016年07月31日)

http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/index.html



具体代码,也是写的比较随意:

using AngleSharp.Parser.Html;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;

namespace CrawlerArea
{
    class Program
    {
        static void Main(string[] args)
        {
            Console.WriteLine(DateTime.Now);
            //省
            //f("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/index.html");
            //市
            //AreaDBEntities areaDBEntities = new AreaDBEntities();
            //var data = areaDBEntities.AreaInfoes.ToList();
            //foreach (var item in data)
            //{
            //    string url = string.Format("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/{0}.html", item.Code);
            //    getCityInfo(url, item.Code);
            //    System.Threading.Thread.Sleep(50);
            //}
            //Console.WriteLine(DateTime.Now);

            //区 / 县
            //AreaDBEntities areaDBEntities = new AreaDBEntities();
            //var data = areaDBEntities.AreaInfoes.ToList();
            //foreach (var item in data)
            //{
            //    string url = string.Format("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/{0}/{1}.html", item.PedarId, item.Code);
            //    getCountyInfo(url, item.PedarId, item.Code);
            //    System.Threading.Thread.Sleep(50);
            //}
            //Console.WriteLine(DateTime.Now);
            ////街道
            //AreaDBEntities areaDBEntities = new AreaDBEntities();
            //var data = areaDBEntities.AreaInfoes.Where(t => t.PedarId >= 1000).ToList();
            //foreach (var item in data)
            //{
            //    string temp = item.Code.ToString();
            //    string url = string.Format("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/{0}/{1}/{2}.html", temp.Substring(0, 2), temp.Substring(2, 2), item.Code);
            //    getStreetInfo(url, item.Code);
            //    System.Threading.Thread.Sleep(50);
            //}
            //Console.WriteLine(DateTime.Now);
            //村委会
            AreaDBEntities areaDBEntities = new AreaDBEntities();
            var data = areaDBEntities.AreaInfoes.Where(t => t.Code.Length >= 9 ).ToList();
            foreach (var item in data)
            {
                string temp = item.Code.ToString();
                string url = string.Format("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/{0}/{1}/{2}/{3}.html", temp.Substring(0, 2), temp.Substring(2, 2), temp.Substring(4, 2), item.Code);
                getCommitteeInfo(url, item.Code);
                Console.WriteLine(item.Code+"----"+item.Name);
                System.Threading.Thread.Sleep(200);
            }
            Console.WriteLine(DateTime.Now);
            Console.WriteLine("OK");
            Console.ReadKey();
        }
        /// <summary>
        /// 居委会
        /// </summary>
        /// <param name="url"></param>
        /// <param name="code"></param>
        private static void getCommitteeInfo(string url, string code)
        {
            var htmlString = HttpGet(url);
            HtmlParser htmlParser = new HtmlParser();
            var data = htmlParser.Parse(htmlString)
                .QuerySelectorAll(".villagetr")
                .Select(t => t)
                .ToList();

            List<Node> list = new List<Node>();
            foreach (var item in data)
            {
                var str = item.Children.ToList();
                Node node = new Node();
                node.code = str[0].InnerHtml;
                node.area = str[2].InnerHtml;
                list.Add(node);
            }

            AreaDBEntities areaDBEntities = new AreaDBEntities();
            foreach (var item in list)
            {
                AreaInfo areaInfo = new AreaInfo();
                areaInfo.Code = item.code;
                areaInfo.Name = item.area;
                areaInfo.PedarId =int.Parse( code);
                areaDBEntities.AreaInfoes.Add(areaInfo);
            }
            areaDBEntities.SaveChanges();
        }

        /// <summary>
        /// 街道
        /// </summary>
        /// <param name="url"></param>
        /// <param name="code"></param>
        //private static void getStreetInfo(string url, int? code)
        //{
        //    var htmlString = HttpGet(url);
        //    HtmlParser htmlParser = new HtmlParser();
        //    var data = htmlParser.Parse(htmlString)
        //        .QuerySelectorAll(".towntr")
        //        .Select(t => t)
        //        .ToList();

        //    List<Node> list = new List<Node>();
        //    foreach (var item in data)
        //    {
        //        var area = htmlParser.Parse(item.InnerHtml)
        //            .QuerySelectorAll("a")
        //            .Select(t => t).ToList();

        //        foreach (var td in area)
        //        {
        //            Node node = new Node();
        //            node.code = td.GetAttribute("href");
        //            node.area = td.TextContent;
        //            list.Add(node);
        //        }
        //    }

        //    AreaDBEntities areaDBEntities = new AreaDBEntities();
        //    int k = 0;
        //    foreach (var item in list)
        //    {
        //        if (k % 2 != 0)
        //        {
        //            AreaInfo areaInfo = new AreaInfo();
        //            Console.WriteLine(item.code + "----" + item.area);
        //            string code1 = item.code.Substring(item.code.IndexOf("/") + 1, 9);
        //            areaInfo.Code = int.Parse(code1);
        //            areaInfo.Name = item.area;
        //            areaInfo.PedarId = code;
        //            areaDBEntities.AreaInfoes.Add(areaInfo);
        //        }
        //        k++;
        //    }
        //    areaDBEntities.SaveChanges();
        //    Console.WriteLine();
        //}

        //private static void getCountyInfo(string url, int? PedarId, int? code)
        //{
        //    if (PedarId == null) return;
        //    var htmlString = HttpGet(url);
        //    HtmlParser htmlParser = new HtmlParser();
        //    var data = htmlParser.Parse(htmlString)
        //        .QuerySelectorAll(".countytr")
        //        .Select(t => t)
        //        .ToList();

        //    List<Node> list = new List<Node>();
        //    foreach (var item in data)
        //    {
        //        var area = htmlParser.Parse(item.InnerHtml)
        //            .QuerySelectorAll("a")
        //            .Select(t => t).ToList();

        //        foreach (var td in area)
        //        {
        //            Node node = new Node();
        //            node.code = td.GetAttribute("href");
        //            node.area = td.TextContent;
        //            list.Add(node);
        //        }
        //    }

        //    AreaDBEntities areaDBEntities = new AreaDBEntities();
        //    int k = 0;
        //    foreach (var item in list)
        //    {
        //        if (k % 2 != 0)
        //        {
        //            AreaInfo areaInfo = new AreaInfo();
        //            Console.WriteLine(item.code + "----" + item.area);
        //            string code1 = item.code.Substring(item.code.IndexOf("/") + 1, 6);
        //            areaInfo.Code = int.Parse(code1);
        //            areaInfo.Name = item.area;
        //            areaInfo.PedarId = code;
        //            areaDBEntities.AreaInfoes.Add(areaInfo);
        //        }
        //        k++;
        //    }
        //    areaDBEntities.SaveChanges();
        //    Console.WriteLine();
        //}

        //private static void getCityInfo(string url, int? PedarId)
        //{
        //    var htmlString = HttpGet(url);
        //    HtmlParser htmlParser = new HtmlParser();
        //    var data = htmlParser.Parse(htmlString)
        //        .QuerySelectorAll(".citytr")
        //        .Select(t => t)
        //        .ToList();

        //    List<Node> list = new List<Node>();
        //    foreach (var item in data)
        //    {
        //        var area = htmlParser.Parse(item.InnerHtml)
        //            .QuerySelectorAll("a")
        //            .Select(t => t).ToList();

        //        foreach (var td in area)
        //        {
        //            Node node = new Node();
        //            node.code = td.GetAttribute("href");
        //            node.area = td.TextContent;
        //            list.Add(node);
        //        }
        //    }

        //    AreaDBEntities areaDBEntities = new AreaDBEntities();
        //    int k = 0;
        //    foreach (var item in list)
        //    {
        //        if (k % 2 != 0)
        //        {
        //            AreaInfo areaInfo = new AreaInfo();
        //            Console.WriteLine(item.code + "----" + item.area);
        //            string code = item.code.Substring(item.code.IndexOf("/") + 1, 4);
        //            areaInfo.Code = int.Parse(code);
        //            areaInfo.Name = item.area;
        //            areaInfo.PedarId = PedarId;
        //            areaDBEntities.AreaInfoes.Add(areaInfo);
        //        }
        //        k++;
        //    }
        //    areaDBEntities.SaveChanges();
        //    Console.WriteLine();
        //}

        public static string HttpGet(string url)
        {
            try
            {
                Encoding encoding = Encoding.UTF8;
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                request.Method = "GET";
                request.Accept = "text/html, application/xhtml+xml, */*";
                request.ContentType = "application/json";

                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                using (StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.Default))
                {
                    return reader.ReadToEnd();
                }
            }
            catch (Exception ex)
            {
                //log.Error("WeChatHelper", ex);
                return null;
            }
        }
        ////得到省的信息
        //static void f(string url)
        //{
        //    var htmlString = HttpGet(url);
        //    HtmlParser htmlParser = new HtmlParser();
        //    var data = htmlParser.Parse(htmlString)
        //        .QuerySelectorAll(".provincetr")
        //        .Select(t => t)
        //        .ToList();

        //    List<Node> list = new List<Node>();
        //    foreach (var item in data)
        //    {
        //        var area = htmlParser.Parse(item.InnerHtml)
        //            .QuerySelectorAll("a")
        //            .Select(t => t).ToList();

        //        foreach (var td in area)
        //        {
        //            Node node = new Node();
        //            node.code = td.GetAttribute("href");
        //            node.area = td.TextContent;
        //            list.Add(node);
        //        }
        //    }
        //    AreaDBEntities areaDBEntities = new AreaDBEntities();
        //    foreach (var item in list)
        //    {
        //        AreaInfo areaInfo = new AreaInfo();
        //        Console.WriteLine(item.code + "----" + item.area);
        //        areaInfo.Code = int.Parse(item.code.Substring(0, item.code.IndexOf(".")));
        //        areaInfo.Name = item.area;
        //        areaInfo.PedarId = null;
        //        areaDBEntities.AreaInfoes.Add(areaInfo);
        //    }
        //    areaDBEntities.SaveChanges();
        //    Console.WriteLine();
        //    //}
        //}
    }
    class Node
    {
        public string code { get; set; }
        public string area { get; set; }
    }

    class td
    {
        public string td1 { get; set; }
    }
}

等下会将生成数据库脚本分享出来,可以私聊我

猜你喜欢

转载自blog.csdn.net/qq_32688731/article/details/79835339
今日推荐