Python code

# 引包
from bs4 import BeautifulSoup
import requests

url = 'http://www.zzmetro.com/lines/query/operating_hours'
html = requests.get(url).text
soup = BeautifulSoup(html)

# 看地铁线的名字
name_arr = []
for i in soup.select('.lines_div .lines_ul_div')[0].find_all('p'):
    # print(i.text)
    name_arr.append(i.text)

# 找每条地铁线的沿线站点,以及用末班车的到站时间来计算邻近两站之间的通勤时间
zong_arr = []
for name, x in zip(name_arr, soup.select('.line_site .table_lx_div')):
    # if name == '7号线':
    #     break
    ditie_name = []
    ditie_time = []
    ll_time = 0
    rr_time = 0

    a1 = 0
    a2 = 0
	
	# 找到每行的数据
    for i in x.find_all('tr')[2:]:
        nn = i.find_all('td')
		
		# 去掉无效值
        if nn[2].text.strip() == '——' and nn[4].text.strip() == '——':
            continue
		
		# 记录时间
        l_time = nn[2].text.strip().split(':')
        r_time = nn[4].text.strip().split(':')

		# 计算时间
        if not l_time[0] == '——' and not l_time[0] == '---':
            if l_time[0] == '0':
                l_time[0] = '24'
            a1 = abs(int(l_time[0]) * 60 + int(l_time[1]) - ll_time)
            ll_time = int(l_time[0]) * 60 + int(l_time[1])

        if not r_time[0] == '——' and not r_time[0] == '---':
            if r_time[0] == '0':
                r_time[0] = '24'
            a2 = abs(int(r_time[0]) * 60 + int(r_time[1]) - rr_time)
            rr_time = int(r_time[0]) * 60 + int(r_time[1])
		# 打印信息
        print(nn[0].text.strip(), nn[2].text.strip(), nn[4].text.strip(), name, min(a1, a2))
        
        ditie_name.append(nn[0].text.strip())
        ditie_time.append(min(a1, a2))
        a1 = a2 = 100
    # 将信息存储成csv
    df = pd.DataFrame()
    df['前一站'] = ditie_name[:-1]
    df['后一站'] = ditie_name[1:]
    df['地铁线'] = name
    df['时间分钟'] = ditie_time[1:]
    zong_arr.append(df)

# 拼接信息
df = pd.concat(zong_arr).reset_index().drop(['index'], axis=1) 

Guess you like

Origin blog.csdn.net/u010095372/article/details/130761549