问答系统对问题分类

通过对问答系统的问题进行分类,可以使我们更加清楚问题的类别,以便更加容易针对某种问题的类型进行建模。

我把问题分为6类,其中为“5W”,即where、when、who、why、what及other。

# encoding=utf-8
import xlrd
import xlwt


def writeexcel(what,where,when,who,why,tu):
    excelTabel = xlwt.Workbook()
    #创建6个表
    sheet1 = excelTabel.add_sheet('what', cell_overwrite_ok=True)
    sheet2 = excelTabel.add_sheet('where', cell_overwrite_ok=True)
    sheet3 = excelTabel.add_sheet('when', cell_overwrite_ok=True)
    sheet4 = excelTabel.add_sheet('who', cell_overwrite_ok=True)
    sheet5 = excelTabel.add_sheet('why', cell_overwrite_ok=True)
    sheet6 = excelTabel.add_sheet('other', cell_overwrite_ok=True)
    #将分好的类写入各个表中
    i = 1
    for each in what:
        sheet1.write(0, 0, 'what')
        sheet1.write(i, 0, each)
        i += 1
    m = 1
    for each in where:
        sheet2.write(0, 0, 'where')
        sheet2.write(m, 0, each)
        m += 1
    y = 1
    for each in when:
        sheet3.write(0, 0, 'when')
        sheet3.write(y, 0, each)
        y += 1
    t = 1
    for each in who:
        sheet4.write(0, 0, 'who')
        sheet4.write(t, 0, each)
        t += 1
    s = 1
    for each in why:
        sheet5.write(0, 0, 'why')
        sheet5.write(s, 0, each)
        s += 1
    p = 1
    for each in tu:
        sheet6.write(0, 0, 'other')
        sheet6.write(p, 0, each)
        p += 1
       #保存表退出
    excelTabel.save('new.xls')

def driver(file):
	#读取xls文件
    data = xlrd.open_workbook(file, encoding_override='gdk')
    table = data.sheets()[0]
    nrows = table.nrows  # 获取行号
    what = []
    who = []
    why = []
    where = []
    when = []
    pu = []

    for i in range(1, nrows):
        row = table.row_values(i)
        pu.append(row[1])
     #定义what关键词类型
    r1 = ['么','哪几','如何','哪些','怎么','哪个','多少','怎样','哪种']
    for line in pu:
        for each in r1:
            if each in line:
                what.append(line)
    #定义why关键词类型
    y1 = ['原因','可否','能否','是否','吗','应否','是不是','会不会','有没有','要不要','能不能']
    for line in pu:
        for each in y1:
            if each in line:
                why.append(line)
    #定义who关键词类型
    s1 = ['谁','什么人','哪一方']
    for line in pu:
        for each in s1:
            if each in line:
                who.append(line)
   #定义where关键词类型
    t1 = ['哪里','什么地点']
    for line in pu:
        for each in t1:
            if each in line:
                where.append(line)
    #定义when关键词类型
    n1 = ['多久','履行期','多长','时候','时间','何时','几年','哪一天','办案时限']
    for line in pu:
        for each in n1:
            if each in line:
                when.append(line)
   #对每个类型进行去重排序
    what = list(set(what))
    when = list(set(when))
    where = list(set(where))
    why = list(set(why))
    who = list(set(who))
	#将what类型和其他类型对比,去除what中包含其他类型中的数据
    new = what + when
    what = []
    for each in new:
        if each not in when:
            what.append(each)
    new = what + where
    what = []
    for each in new:
        if each not in where:
            what.append(each)
    new = what + who
    what = []
    for each in new:
        if each not in who:
            what.append(each)
    new = what + why
    what = []
    for each in new:
        if each not in why:
            what.append(each)

    lists = what+when+where+why+who
    lists = list(set(lists))
    tus = []
    #将不能确定的问题类型划分为一类
    tu = (each for each in pu if each not in lists)
    for i in tu:
        tus.append(i)
    writeexcel(what,where,when,who,why,tus)


driver('dateset.xls')


(1)、如果您在阅读博客时遇到问题或者不理解的地方,可以联系我,互相交流、互相进步;
(2)、本人业余时间可以承接毕业设计和各种小项目,如系统构建、成立网站、数据挖掘、机器学习、深度学习等。有需要的加QQ:1143948594,备注“csdn项目”。

猜你喜欢

转载自blog.csdn.net/qq_32113189/article/details/86229361