python3 爬取boss直聘职业分类数据(未完成)

import re
import urllib.request

# 爬取boss直聘职业分类数据
def subRule(fileName):
result = re.findall(r'<p class="menu-article">[\u4e00-\u9fa5]+</p>',fileName);
return result;

def subRule1(fileName):
result = re.findall(r'<h4>[\u4e00-\u9fa5]+</h4>',fileName);
return result;

def subRule2(fileName):
# [a-zA-Z]{0,10}|[A-Z]{0,10}[A-Za-z]{0,10}[/]{0,1}[\u4e00-\u9fa5]+[a-z]{0,10}|[A-Z]{0,10}
# 未匹配class="cur"的情况
result = re.findall(r'p[0-9]+/">.+</a>',fileName);
return result;

bossHtml = getHtml('https://www.zhipin.com/?ka=header-home');
print(bossHtml);

ruleResult = subRule(bossHtml.decode('utf-8'));
#print(ruleResult);
for i in ruleResult:
print(i);

ruleResult1 = subRule1(bossHtml.decode('utf-8'));
#print(ruleResult1);
for i in ruleResult1:
print(i);

ruleResult2 = subRule2(bossHtml.decode('utf-8'));
#print(ruleResult2);
for i in ruleResult2:
print(i);

猜你喜欢

转载自www.cnblogs.com/mxh-java/p/10748006.html