python3—-读取本机谷歌浏览器的访问的URL历史记录信息
1、本机谷歌浏览器的历史记录信息:
在该目录下:C:\Users\Administrator\AppData\Local\Google\Chrome\User Data\Default
2、谷歌浏览器的历史记录使用的sqlite3数据库存储数据的:
使用sqlite3数据浏览工具查看如下
3、读取URL的历史记录信息,python3脚本代码:
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 22 20:00:02 2018
@author: Administrator
"""
import os
import sqlite3
import operator
from collections import OrderedDict
if __name__ == '__main__':
#指到用户的谷歌浏览器的历史记录
data_path = r'C:\Users\Administrator\AppData\Local\Google\Chrome\User Data\Default'
files = os.listdir(data_path)
history_db = os.path.join(data_path,'history1')
#查询数据库内容
conn = sqlite3.connect(history_db)
cursor = conn.cursor()
select_statement_queryall = 'SELECT * FROM urls;'
#select_statement_query = 'SELECT urls.url,urls.visit_count FROM urls,visits WHERE urls.id=visits.url;'
#select_statement_delete_id = ' DELETE FROM urls;'
#cursor.execute(select_statement_delete_id)
#conn.commit#修改数据必须commit提交下
cursor.execute(select_statement_queryall)
results = cursor.fetchall()
for d in results:
print("ID: "+str(d[0])+'\t'+"URL: "+str(d[1])+"\t"+"Title: "+str(d[2])+'\t'+"visit_count: "+str(d[3])+'\t'+"typed_count: "+str(d[4])+'\t'+"last_visit_time: "+str(d[5])+'\t'+"hiddlen: "+str(d[6])+'\t')
4、脚本代码运行情况:
ID: 1 URL: http://baojia.3hk.cn/301 Title: baojia.3hk.cn visit_count: 0 typed_count: 0 last_visit_time: 0 hiddlen: 0
ID: 2 URL: http://blog.csdn.net/SKI_12/article/category/6689258 Title: 提权 - 闲云~ - CSDN博客 visit_count: 0 typed_count: 0 last_visit_time: 0 hiddlen: 0
ID: 3 URL: http://blog.csdn.net/SKI_12/article/details/72972238?locationNum=2&fps=1 Title: 关于《Python绝技:运用Python成为顶级黑客》的学习笔记 - 闲云~ - CSDN博客 visit_count: 0 typed_count: 0 last_visit_time: 0 hiddlen: 0
.........................................................
.........................................................
.........................................................
ID: 276 URL: http://www.soyunpan.com/search/%E5%B0%91%E5%84%BF%E6%8A%98%E7%BA%B8-0-%E5%85%A8%E9%83%A8-0.html Title: 少儿折纸 百度云资源网盘下载_搜云盘 visit_count: 1 typed_count: 0 last_visit_time: 13174154433463515 hiddlen: 0
ID: 277 URL: http://www.soyunpan.com/file/27104757.html Title: 折纸 百度云网盘资源下载_百度云盘 visit_count: 1 typed_count: 0 last_visit_time: 13174154436053165 hiddlen: 0
5、统计不同网站访问次数
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 22 20:00:02 2018
@author: Administrator
"""
import os
import sqlite3
import operator
from collections import OrderedDict
def parse(url):
try:
parsed_url_components = url.split('//')
sublevel_split = parsed_url_components[1].split('/',1)
domian = sublevel_split[0].replace('www.','')
return domian
except IndexError:
print('URL format error!!')
if __name__ == '__main__':
#指到用户的谷歌浏览器的历史记录
data_path = r'C:\Users\Administrator\AppData\Local\Google\Chrome\User Data\Default'
files = os.listdir(data_path)
history_db = os.path.join(data_path,'history1')
#查询数据库内容
conn = sqlite3.connect(history_db)
cursor = conn.cursor()
#select_statement_queryall = 'SELECT * FROM urls;'
select_statement_query = 'SELECT urls.url,urls.visit_count FROM urls,visits WHERE urls.id=visits.url;'
#select_statement_delete_id = ' DELETE FROM urls;'#清空urls表的所有的数据
#cursor.execute(select_statement_delete_id)
#conn.commit
#cursor.execute(select_statement_queryall)
cursor.execute(select_statement_query)
results = cursor.fetchall()
#for d in results:
#print("ID: "+str(d[0])+'\t'+"URL: "+str(d[1])+"\t"+"Title: "+str(d[2])+'\t'+"visit_count: "+str(d[3])+'\t'+"typed_count: "+str(d[4])+'\t'+"last_visit_time: "+str(d[5])+'\t'+"hiddlen: "+str(d[6])+'\t')
sites_count = {}#定义为字典
for url,count in results:
#print(url)
#print(count)
url = parse(url)
if url in sites_count:
sites_count[url] += 1
else:
sites_count[url] = 1
sites_count_sorted = OrderedDict(sorted(sites_count.items(),key=operator.itemgetter(1),reverse=True))
print(sites_count_sorted)
6、统计访问次数的运行情况
OrderedDict([('sogou.com', 33), ('blog.csdn.net', 17), ('hao123.com', 14), ('btmayis.com', 10), ('v.qq.com', 10), ('iqiyi.com', 9), ('56.com', 6), ('mp.csdn.net', 4), ('baike.sogou.com', 4), ('37.com', 4), ('daojia.jd.com', 4), ('rarbt.com', 4), ('csdn.net', 3), ('tv.sohu.com', 3), ('icbc.com.cn', 3), ('guazi.com', 3), ('autohome.com.cn', 3), ('yueba80.com', 3), ('taobao.com', 3), ('news.163.com', 3), ('soyunpan.com', 3), ('gocklaboggjfkolaknpbhddbaopcepfp', 2), ('passport.csdn.net', 2), ('xiazaiba.com', 2), ('crsky.com', 2), ('tudou.com', 2), ('video.tudou.com', 2), ('new-play.tudou.com', 2), ('huancaicp.com', 2), ('union-click.jd.com', 2), ('jd.com', 2), ('esf.sz.fang.com', 2), ('baiduyunpan.com', 1), ('bbs.csdn.net', 1), ('youku.com', 1), ('live.youku.com', 1), ('gongyi.youku.com', 1), ('fun.youku.com', 1), ('news.sogou.com', 1), ('i.56.com', 1), ('count.crsky.com', 1), ('pinshan.com', 1), ('ld.sogou.com', 1), ('bb.qq.com', 1), ('bb.news.qq.com', 1), ('hao.880332.net', 1), ('cac.gov.cn', 1), ('epass.icbc.com.cn', 1), ('sempage.guazi.com', 1), ('mir.37.com', 1), ('c.autohome.com.cn', 1), ('e.cn.miaozhen.com', 1), ('mall.autohome.com.cn', 1), ('huancai.com', 1), ('channel.jd.com', 1), ('neiyi.taobao.com', 1), ('guang.taobao.com', 1), ('game.taobao.com', 1), ('qq.com', 1), ('163.com', 1), ('renjian.163.com', 1), ('fang.com', 1), ('sz.fang.com', 1), ('fangjia.fang.com', 1), ('home.sz.fang.com', 1), ('adshow.fang.com', 1), ('caifucheng.fang.com', 1), ('newhouse.sz.fang.com', 1), ('bbs.miercn.com', 1), ('5566.net', 1), ('douyu.com', 1), ('bitauto.com', 1), ('shenzhen.bitauto.com', 1), ('car.bitauto.com', 1)])