python3----读取本机谷歌浏览器的访问的URL历史记录信息

python3—-读取本机谷歌浏览器的访问的URL历史记录信息

1、本机谷歌浏览器的历史记录信息:
在该目录下:C:\Users\Administrator\AppData\Local\Google\Chrome\User Data\Default

这里写图片描述

2、谷歌浏览器的历史记录使用的sqlite3数据库存储数据的:
使用sqlite3数据浏览工具查看如下

这里写图片描述

3、读取URL的历史记录信息,python3脚本代码:

# -*- coding: utf-8 -*-
"""
Created on Fri Jun 22 20:00:02 2018
@author: Administrator
"""
import os
import sqlite3
import operator
from collections import OrderedDict

if __name__ == '__main__':
    #指到用户的谷歌浏览器的历史记录
    data_path = r'C:\Users\Administrator\AppData\Local\Google\Chrome\User Data\Default'
    files = os.listdir(data_path)
    history_db = os.path.join(data_path,'history1')

    #查询数据库内容
    conn = sqlite3.connect(history_db)
    cursor = conn.cursor()
    select_statement_queryall = 'SELECT * FROM urls;'
    #select_statement_query = 'SELECT urls.url,urls.visit_count FROM urls,visits WHERE urls.id=visits.url;'
    #select_statement_delete_id = ' DELETE FROM urls;'
    #cursor.execute(select_statement_delete_id)
    #conn.commit#修改数据必须commit提交下
    cursor.execute(select_statement_queryall)
    results = cursor.fetchall()
    for d in results:
        print("ID: "+str(d[0])+'\t'+"URL: "+str(d[1])+"\t"+"Title: "+str(d[2])+'\t'+"visit_count: "+str(d[3])+'\t'+"typed_count: "+str(d[4])+'\t'+"last_visit_time: "+str(d[5])+'\t'+"hiddlen: "+str(d[6])+'\t')
4、脚本代码运行情况:

ID: 1   URL: http://baojia.3hk.cn/301   Title: baojia.3hk.cn    visit_count: 0  typed_count: 0  last_visit_time: 0      hiddlen: 0      
ID: 2   URL: http://blog.csdn.net/SKI_12/article/category/6689258       Title: 提权 - 闲云~ - CSDN博客        visit_count: 0  typed_count: 0  last_visit_time: 0      hiddlen: 0      
ID: 3   URL: http://blog.csdn.net/SKI_12/article/details/72972238?locationNum=2&fps=1   Title: 关于《Python绝技:运用Python成为顶级黑客》的学习笔记 - 闲云~ - CSDN博客  visit_count: 0  typed_count: 0  last_visit_time: 0      hiddlen: 0           
.........................................................
.........................................................
.........................................................           
ID: 276 URL: http://www.soyunpan.com/search/%E5%B0%91%E5%84%BF%E6%8A%98%E7%BA%B8-0-%E5%85%A8%E9%83%A8-0.html    Title: 少儿折纸 百度云资源网盘下载_搜云盘       visit_count: 1  typed_count: 0  last_visit_time: 13174154433463515      hiddlen: 0      
ID: 277 URL: http://www.soyunpan.com/file/27104757.html Title: 折纸 百度云网盘资源下载_百度云盘        visit_count: 1  typed_count: 0  last_visit_time: 13174154436053165      hiddlen: 0 
5、统计不同网站访问次数

# -*- coding: utf-8 -*-
"""
Created on Fri Jun 22 20:00:02 2018

@author: Administrator
"""
import os
import sqlite3
import operator
from collections import OrderedDict

def parse(url):
    try:
        parsed_url_components = url.split('//')
        sublevel_split = parsed_url_components[1].split('/',1)
        domian = sublevel_split[0].replace('www.','')
        return domian
    except IndexError:
        print('URL format error!!')

if __name__ == '__main__':
    #指到用户的谷歌浏览器的历史记录
    data_path = r'C:\Users\Administrator\AppData\Local\Google\Chrome\User Data\Default'
    files = os.listdir(data_path)
    history_db = os.path.join(data_path,'history1')

    #查询数据库内容
    conn = sqlite3.connect(history_db)
    cursor = conn.cursor()
    #select_statement_queryall = 'SELECT * FROM urls;'
    select_statement_query = 'SELECT urls.url,urls.visit_count FROM urls,visits WHERE urls.id=visits.url;'
    #select_statement_delete_id = ' DELETE FROM urls;'#清空urls表的所有的数据
    #cursor.execute(select_statement_delete_id)
    #conn.commit
    #cursor.execute(select_statement_queryall)
    cursor.execute(select_statement_query)
    results = cursor.fetchall()
    #for d in results:
        #print("ID: "+str(d[0])+'\t'+"URL: "+str(d[1])+"\t"+"Title: "+str(d[2])+'\t'+"visit_count: "+str(d[3])+'\t'+"typed_count: "+str(d[4])+'\t'+"last_visit_time: "+str(d[5])+'\t'+"hiddlen: "+str(d[6])+'\t')

    sites_count = {}#定义为字典

    for url,count in results:
        #print(url)
        #print(count)
        url = parse(url)
        if url in sites_count:
            sites_count[url] += 1
        else:
            sites_count[url] = 1
    sites_count_sorted = OrderedDict(sorted(sites_count.items(),key=operator.itemgetter(1),reverse=True))
    print(sites_count_sorted)
6、统计访问次数的运行情况

OrderedDict([('sogou.com', 33), ('blog.csdn.net', 17), ('hao123.com', 14), ('btmayis.com', 10), ('v.qq.com', 10), ('iqiyi.com', 9), ('56.com', 6), ('mp.csdn.net', 4), ('baike.sogou.com', 4), ('37.com', 4), ('daojia.jd.com', 4), ('rarbt.com', 4), ('csdn.net', 3), ('tv.sohu.com', 3), ('icbc.com.cn', 3), ('guazi.com', 3), ('autohome.com.cn', 3), ('yueba80.com', 3), ('taobao.com', 3), ('news.163.com', 3), ('soyunpan.com', 3), ('gocklaboggjfkolaknpbhddbaopcepfp', 2), ('passport.csdn.net', 2), ('xiazaiba.com', 2), ('crsky.com', 2), ('tudou.com', 2), ('video.tudou.com', 2), ('new-play.tudou.com', 2), ('huancaicp.com', 2), ('union-click.jd.com', 2), ('jd.com', 2), ('esf.sz.fang.com', 2), ('baiduyunpan.com', 1), ('bbs.csdn.net', 1), ('youku.com', 1), ('live.youku.com', 1), ('gongyi.youku.com', 1), ('fun.youku.com', 1), ('news.sogou.com', 1), ('i.56.com', 1), ('count.crsky.com', 1), ('pinshan.com', 1), ('ld.sogou.com', 1), ('bb.qq.com', 1), ('bb.news.qq.com', 1), ('hao.880332.net', 1), ('cac.gov.cn', 1), ('epass.icbc.com.cn', 1), ('sempage.guazi.com', 1), ('mir.37.com', 1), ('c.autohome.com.cn', 1), ('e.cn.miaozhen.com', 1), ('mall.autohome.com.cn', 1), ('huancai.com', 1), ('channel.jd.com', 1), ('neiyi.taobao.com', 1), ('guang.taobao.com', 1), ('game.taobao.com', 1), ('qq.com', 1), ('163.com', 1), ('renjian.163.com', 1), ('fang.com', 1), ('sz.fang.com', 1), ('fangjia.fang.com', 1), ('home.sz.fang.com', 1), ('adshow.fang.com', 1), ('caifucheng.fang.com', 1), ('newhouse.sz.fang.com', 1), ('bbs.miercn.com', 1), ('5566.net', 1), ('douyu.com', 1), ('bitauto.com', 1), ('shenzhen.bitauto.com', 1), ('car.bitauto.com', 1)])

猜你喜欢

转载自blog.csdn.net/xwbk12/article/details/80781732