截取网页ICP的sql语句

首先获取网站内容后,从内容中截取出ICP信息

--从text中初步截取出ICP

update z_beijing_all_web33 set ent_icp=substr(web_text,instr(web_text,'ICP',1)-2,35)
where is_yellp is null and web_text like '%ICP%' and  ent_icp is null


update z_beijing_all_web33 set ent_icp=substr(web_text,instr(web_text,'icp',1)-2,35)
where is_yellp is null and web_text like '%icp%' and  ent_icp is null




--截取icp

--截取ICP第一步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'号-','1')+2)
where ent_icp is not null and ent_icp like '%号-%'
--截取ICP第二步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,'ICP','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%号-%'
--截取ICP第三步
update z_beijing_all_web33 set ent_icp= substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%ICP证%' and ent_icp like '%号-%' and   ent_icp like '%ICP备%'
--截取ICP第四步

update z_beijing_all_web33 set ent_icp= substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%许可证%' and ent_icp like '%号-%' and   ent_icp like '%ICP备%'
--截取ICP第五步

update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,':','1')+1,length(ent_icp))
where ent_icp is not null and (ent_icp like '%备案%' and ent_icp like '%:%'  ) and ent_icp like '%号-%' and   ent_icp like '%ICP备%'
--截取ICP第六步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,':','1')+1,length(ent_icp))
where ent_icp is not null and (ent_icp like '%备案%' and ent_icp like '%:%'  ) and ent_icp like '%号-%' and   ent_icp like '%ICP备%'

--截取ICP第七步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'号','1'))
where ent_icp is not null  and ent_icp like '%号%' and ent_icp not like '%号-%'  and ent_icp not like '%证号%'

--截取ICP第八步
update z_beijing_all_web33 set ent_icp= substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%许可证%' and ent_icp like '%号%' and ent_icp not like '%号-%' and  ent_icp like '%ICP备%'


--截取ICP第九步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,'ICP','1')-1,length(ent_icp))
where ent_icp is not null  and ent_icp like '%号%' and ent_icp not like '%号-%'

--截取ICP第十步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null  and ent_icp like '%号%' and ent_icp not like '%号-%' and ent_icp like '%证%' and  ent_icp like '%ICP备%'

--截取ICP第十一步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'京公网安备','1')-1)
where ent_icp like '%京公网安备%'

--截取ICP第十二步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'固定电话','1')-1)
where ent_icp like '%固定电话%'

--截取ICP第十三步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'电话','1')-1)
where ent_icp like '%电话%'

--截取ICP第十四步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'咨询热线','1')-1)
where ent_icp like '%咨询热线%'

--截取ICP第十五步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'版权所有','1')-1)
where ent_icp like '%版权所有%'
--截取ICP第十六步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'地址','1')-1)
where ent_icp like '%地址%'

-----------------------------
update z_beijing_all_web33 set ent_icp = replace(ent_icp,'?','') where ent_icp like '%?%'

猜你喜欢

转载自st4024589553.iteye.com/blog/2394500
ICP