知识图谱构建7——基于REFO的简单知识问答(KBQA)

知识图谱构建7——基于REFO的简单知识问答(KBQA)

实例结构:
在这里插入图片描述

#!/bin/bash
#连接MySQL数据库
Host=127.0.0.1(或者数据库服务器地址)
User=root
PW=数据库密码

#本地连接
mysql -u$User -p$PW <<EOF #开始SQL语句
use kg_movie;#选择数据库(选择模式)
#show tables;#执行SQL语句
select actor_chName from actor into outfile 'C:/ProgramData/MySQL/MySQL Server 5.7/Uploads/actor_Name.txt';
select movie_chName from movie into outfile 'C:/ProgramData/MySQL/MySQL Server 5.7/Uploads/movie_Name.txt';
EOF #结束SQL语句

# Append "nz" and "nr" tag for jieba
# nr代表演员名
# nz代表电影名称
awk '{print $0 " nr"}' actor_Name.txt > actorName.txt
awk '{print $0 " nz"}' movie_Name.txt > movieName.txt

# 移动文件夹到G:\PyCharmWP\kgMovie\KBQA\patternREFO\data
mv actorName.txt G:/PyCharmWP/kgMovie/KBQA/patternREFO/data/
mv movieName.txt G:/PyCharmWP/kgMovie/KBQA/patternREFO/data/

# Remove redundant file,actor_Name.txt and  movieName.txt
rm *_Name.txt

# 注:将get_dict.sh 放到C:/ProgramData/MySQL/MySQL Server 5.7/Uploads目录下运行

将数据库中的字段数据导出时出现如下secure-file-priv选项问题:

ERROR 1290 (HY000) at line 4: The MySQL server is running with the --secure-file-priv option so it cannot execute this statement

参考:mysql5.7导出数据提示–secure-file-priv选项问题的解决方法
https://blog.csdn.net/fdipzone/article/details/78634992

# encoding=utf-8

import jieba
import jieba.posseg as pseg


class Word(object):
    def __init__(self, token, pos):
        self.token = token
        self.pos = pos


class Tagger:
    def __init__(self, dict_paths):
        # TODO 加载外部词典
        for p in dict_paths:
            jieba.load_userdict(p)

    @staticmethod
    def get_word_objects(sentence):
        # type: (str) -> list
        """
        把自然语言转为Word对象
        :param sentence:
        :return:
        """
        return [Word(word.encode('utf-8'), tag) for word, tag in pseg.cut(sentence)]

# TODO 用于测试


if __name__ == '__main__':
    tagger = Tagger(['../data/actorName.txt', '../data/movieName.txt'])
    while True:
        print("请输入:")
        s = input()
        p = tagger.get_word_objects(s)
        # print(p)
        for i in tagger.get_word_objects(s):
            print(i.token.decode('utf8'), i.pos)
# 测试结果:
# 请输入:
# 周星驰是谁
# 周星驰 nr
# 是 v
# 谁 r
  • 将自然语言转换为SPARQL模板
    rules.py:
#!/usr/bin/env python
# coding=utf-8
# 设置问题模板,为每个模板设置对应的SPARQL语句
import re

# SPARQL config
from refo.refo import Predicate, finditer, Star, Any

SPARQL_PREAMBLE = u"""  
PREFIX : <http://www.kg_movie.com#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
"""

SPARQL_TEM = u"{preamble}\n" + \
             u"SELECT DISTINCT {select} WHERE {
   
   {\n" + \
             u"{expression}\n" + \
             u"}}\n"

INDENT = "    "


class W(Predicate):
    """object-oriented regex for words"""

    def __init__(self, token=".*", pos=".*"):
        self.token = re.compile(token + "$")
        self.pos = re.compile(pos + "$")
        super(W, self).__init__(self.match)

    def match(self, word):
        # m1要将字节转化为字符串
        m1 = self.token.match(word.token.decode('utf-8'))
        m2 = self.pos.match(word.pos)
        return m1 and m2


class Rule(object):
    def __init__(self, condition=None, action=None):
        assert condition and action
        self.condition = condition
        self.action = action

    def apply(self, sentence):
        matches = []
        for m in finditer(self.condition, sentence):
            i, j = m.span()
            matches.extend(sentence[i:j])
        if __name__ == '__main__':
            pass
        return self.action(matches)


# 某人是谁的问题
def who_is_question(x):
    select = u"?x0"

    sparql = None
    for w in x:
        if w.pos == "nr" or w.pos == "x":
            e = u" ?a :actor_chName '{person}'. \n \
            ?a :actor_bio ?x0".format(person=w.token.decode("utf-8"))

            sparql = SPARQL_TEM.format(preamble=SPARQL_PREAMBLE,
                                       select=select,
                                       expression=INDENT + e)
            break
    return sparql


# 某人来自哪里的问题
def where_is_from_question(x):
    select = u"?x0"

    sparql = None
    for w in x:
        if w.pos == "nr" or w.pos == "x" or w.pos == "nrt":
            e = u" ?a :actor_chName '{person}'.\n \
            ?a :actor_birthPlace ?x0".format(person=w.token.decode("utf-8"))

            sparql = SPARQL_TEM.format(preamble=SPARQL_PREAMBLE,
                                       select=select,
                                       expression=INDENT + e)
            break
    return sparql


# 某个电影简介的问题
def movie_intro_question(x):
    select = u"?x0"

    sparql = None
    for w in x:
        if w.pos == "nz":
            e = u" ?a :movie_chName '{person}'. \n \
            ?a :movie_bio ?x0".format(person=w.token.decode("utf-8"))

            sparql = SPARQL_TEM.format(preamble=SPARQL_PREAMBLE,
                                       select=select,
                                       expression=INDENT + e)
            break
    return sparql


def customize_rules():
    # some rules for matching
    # TODO: customize your own rules here
    person = (W(pos="nr") | W(pos="x") | W(pos="nrt") | W(pos="nz"))
    movie = (W(pos="nz"))
    place = (W("出生地") | W("出生"))
    intro = (W("简介") | W(pos="介绍"))

    rules = [

        Rule(condition=W(pos="r") + W("是") + person | \
                       person + W("是") + W(pos="r"),
             action=who_is_question),

        Rule(condition=person + Star(Any(), greedy=False) + place + Star(Any(), greedy=False),
             action=where_is_from_question),

        Rule(condition=movie + Star(Any(), greedy=False) + intro + Star(Any(), greedy=False),
             action=movie_intro_question)

    ]
    return rules
#!/usr/bin/env python
# coding=utf-8

from SPARQLWrapper import SPARQLWrapper, JSON

from kgMovie.KBQA.patternREFO.utils.rules import customize_rules
from kgMovie.KBQA.patternREFO.utils.word_tagging import Tagger

if __name__ == "__main__":
    print("init...........")
    # 连接TDB数据库
    sparql_base = SPARQLWrapper("http://localhost:3030/kg_movie/query")
    # 加载外部词典
    tagger = Tagger(['data/actorName.txt', 'data/movieName.txt'])
    # 初始化并获取规则列表
    rules = customize_rules()
    print("done \n")

    while True:
        # 输入问题
        print("Please input your question: ")
        default_question = input()
        # 获取wordclass
        seg_list = tagger.get_word_objects(default_question)
        for rule in rules:
            # 将规则列表应用到问题上得到查询模板
            query = rule.apply(seg_list)
            if query:
                # 设置查询相关
                sparql_base.setQuery(query)
                sparql_base.setReturnFormat(JSON)
                # 得到返回结果并做转换
                results = sparql_base.query().convert()

                if not results["results"]["bindings"]:
                    print("No answer found :(")
                    continue
                for result in results["results"]["bindings"]:
                    print("Result: ", result["x0"]["value"])

在运行query.py:前,应按照前面的教程将电影类知识图谱导入到Jena的TDB数据库中,并运行fuseki服务器,这样我们才能进行访问查询。
具体可见:知识图谱构建5——Jena 和 Fuseki安装与SPARQL查询

python setup.py install

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/weixin_41104835/article/details/88945341
今日推荐