前言
前面的文章已经介绍了 neo4j 服务的本地安装,以及数据的增删改查操作方法。那么这里就要进入 python 项目,来完成医疗知识的构建,问答机器人的代码实现。但篇幅较长,本文就主要介绍知识图谱的构建吧。

环境
Anaconda3
Python3.8
Py2neo (新版)
数据来源 (结构)

编码
1. 引入依赖
import json from py2neo import Graph, Node
2. 类的初始化 (连接 neo4j)
def __init__(self): self.data_path = "./data/medical.json"
self.neo4j = Graph('bolt://localhost:7687', auth=('neo4j', 'beiqiaosu123456'))3. 读取数据
def read_data(self):
# 疾病
diseases = [] # 症状
symptoms = [] # 科室
departments = [] # 药品
drugs = [] # 食物
foods = [] # 出药厂商
producers = [] # 检查项目
checks = [] # 疾病信息
disease_info = [] # 疾病与症状
rels_symptom = [] # 疾病与并发症
rels_acompany = [] # 疾病与科室
rels_category = [] # 科室与科室
rels_department = [] # 疾病与通用药品
rels_commondrug = [] # 疾病与推荐药品
rels_recommenddrug = [] # 疾病与不可吃
rels_noteat = [] # 疾病与可以吃
rels_doeat = [] # 疾病与推荐吃
rels_recommendeat = [] # 疾病与检查项
rels_check = [] # 厂商与药品
rels_drug_producer = [] for data in open(self.data_path, encoding="utf8", mode="r"):
data_json = json.loads(data)
disease = data_json['name']
disease_dict = dict()
disease_dict['get_prob'] = ''
disease_dict['yibao_status'] = ''
disease_dict['easy_get'] = ''
disease_dict['get_way'] = ''
disease_dict['cure_lasttime'] = ''
disease_dict['cured_prob'] = ''
disease_dict['cost_money'] = ''
disease_dict['cure_department'] = []
diseases.append(disease)
disease_dict['name'] = disease
disease_dict['desc'] = data_json['desc']
disease_dict['prevent'] = data_json['prevent']
disease_dict['cause'] = data_json['cause'] if "get_prob" in data_json:
disease_dict['get_prob'] = data_json['get_prob'] if "yibao_status" in data_json:
disease_dict['yibao_status'] = data_json['yibao_status'] if "easy_get" in data_json:
disease_dict['easy_get'] = data_json['easy_get'] if "get_way" in data_json:
disease_dict['get_way'] = data_json['get_way'] if "cure_lasttime" in data_json:
disease_dict['cure_lasttime'] = data_json['cure_lasttime'] if "cured_prob" in data_json:
disease_dict['cured_prob'] = data_json['cured_prob'] if "cost_money" in data_json:
disease_dict['cost_money'] = data_json['cost_money']
disease_info.append(disease_dict)
symptom = data_json['symptom'] for symptom_i in symptom:
rels_symptom.append([disease, symptom_i])
symptoms += symptom # 科室
if "cure_department" in data_json:
cure_department = data_json['cure_department']
departments += cure_department if len(cure_department) == 1:
rels_category.append([disease, cure_department[0]]) if len(cure_department) == 2:
large = cure_department[0]
small = cure_department[1]
rels_department.append([large, small])
rels_category.append([disease, large])
disease_dict['cure_department'] = cure_department # 并发症
if 'acompany' in data_json:
acompanys = data_json['acompany'] for acompany in data_json['acompany']:
rels_acompany.append([disease, acompany])
symptoms += acompanys if 'common_drug' in data_json:
commondrug = data_json['common_drug']
drugs += commondrug for drug_c in commondrug:
rels_commondrug.append([disease, drug_c])
recommenddrug = data_json['recommand_drug'] for drug_recom in recommenddrug:
rels_recommenddrug.append([disease, drug_recom])
drugs += recommenddrug if 'not_eat' in data_json:
noteat = data_json['not_eat'] for noteat_i in noteat:
rels_noteat.append([disease, noteat_i])
foods += noteat if 'do_eat' in data_json:
doeat = data_json['do_eat'] for doeat_i in doeat:
rels_doeat.append([disease, doeat_i])
foods += doeat if 'recommand_eat' in data_json:
recommendfood = data_json['recommand_eat'] for food_i in recommendfood:
rels_recommendeat.append([disease, food_i])
foods += recommendfood
checkitem = data_json['check'] for check_i in checkitem:
check_i.replace("'", "") if check_i != "血清5'-核苷酸酶(5'-NT)":
rels_check.append([disease, check_i])
checks += checkitem # 厂商与药品
druginfo = data_json['drug_detail']
producers += [name.split("(")[0] for name in druginfo]
rels_drug_producer += [[name.split("(")[0], name.split("(")[-1].replace(")", "")] for name in druginfo] return set(diseases), set(symptoms), set(producers), set(departments), set(drugs), set(foods), set(
checks), disease_info, rels_symptom, rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, \
rels_doeat, rels_recommendeat, rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer4. 创建节点
def create_medical_nodes(self):
print("start create nodes")
diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom,\
rels_acompany,rels_commondrug,rels_recommenddrug,rels_noteat,rels_doeat,rels_recommendeat,\
rels_check,rels_drug_producer,rels_department, rels_category, rels_drug_producer = \
build_medical_graph.read_data() # 创建疾病节点
# self.create_node('Diseases', diseases)
# 创建症状节点
# self.create_node('Symptoms', symptoms)
# 创建科室
# self.create_node('Departments', departments)
# 创建药品
# self.create_node('Drugs', drugs)
# 创建食品
# self.create_node('Foods', foods)
# 创建出药厂商
# self.create_node('Producers', producers)
# 创建检查项
# self.create_node('Checks', checks)
self.create_disease_node('Diseases', disease_info) return# 疾病节点单独创建def create_node(self, label, values):
count = 0; for val in values:
count += 1
print("节点: " + label + ", 名称为: " + val)
node = Node(label, name = val) self.neo4j.create(node) return countdef create_disease_node(self, label, values):
count = 0
for disease in values:
print("节点" + label + ", 名称:" + disease['name'])
node = Node(label, name=disease['name'], desc=disease['desc'], prevent=disease['prevent'],cause=disease['cause'],
get_prob=disease['get_prob'],yibao_status=disease['yibao_status'],easy_get=disease['easy_get'],
get_way=disease['get_way'],cure_lasttime=disease['cure_lasttime'],cured_prob=disease['cured_prob'],
cost_money=disease['cost_money'],cure_department=disease['cure_department']) self.neo4j.create(node) return count5. 创建关联边
def create_medical_rels(self):
print("start create rels")
diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \
rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \
rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \
build_medical_graph.read_data() # 疾病与状态
# self.create_rel("Diseases", "Symptoms", rels_symptom, "has_symptoms", "疾病症状")
# 疾病与并发症
# self.create_rel("Diseases", "Symptoms", rels_acompany, "acompany_with", "疾病并发症")
# 疾病与科室
# self.create_rel("Diseases", "Departments", rels_category, "belongs_to", "所属科室")
# 科室与科室
# self.create_rel("Departments", "Departments", rels_department, "belongs_to", "所属")
# 疾病与通用药品
# self.create_rel("Diseases", "Drugs", rels_commondrug, "common_drug", "常用备药")
# 疾病与推荐药品
# self.create_rel("Diseases", "Drugs", rels_recommenddrug, "recommand_drug", "推荐用药")
# 疾病与忌口
# self.create_rel("Diseases", "Foods", rels_noteat, "not_eat", "忌吃")
# 疾病与可以吃
# self.create_rel("Diseases", "Foods", rels_doeat, "do_eat", "可以吃")
# 疾病与推荐吃
# self.create_rel("Diseases", "Foods", rels_recommendeat, "recomment_eat", "推荐吃")
# 疾病与检查项
self.create_rel("Diseases", "Checks", rels_check, "need_check", "需要检查") # 厂商与药品
# self.create_rel("Producers", "drugs", rels_drug_producer, "drug_of", "生产药品")def create_rel(self, start_node, end_node, list, rel_name, rel_attr):
count = 0
for item in list:
count += 1
s = item[0]
e = item[1]
print ("创建边:" +rel_name +",("+start_node+"->"+end_node+"),点1:"+s+"点2:"+e)
query = "Match (start:%s), (end:%s) where start.name='%s' and end.name='%s' create (start)-[rel:%s{name:'%s'}]->(end)" % (
start_node, end_node, s, e, rel_name, rel_attr
) self.neo4j.run(query) return count6. 导出节点数据
# 导出实体的节点分词
def export_data(self):
diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \
rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \
rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \
build_medical_graph.read_data()
# 疾病名
# f_diseases = open("dict/diseases.txt", encoding="utf-8", mode="w+")
# f_diseases.write("\n".join(list(diseases)))
# 症状名
f_symptoms = open("dict/symptoms.txt", encoding="utf-8", mode="w+")
f_symptoms.write("\n".join(list(symptoms)))
f_producers = open("dict/producers.txt", encoding="utf-8", mode="w+")
f_producers.write("\n".join(list(producers)))
f_departments = open("dict/departments.txt", encoding="utf-8", mode="w+")
f_departments.write("\n".join(list(departments)))
f_drugs = open("dict/drugs.txt", encoding="utf-8", mode="w+")
f_drugs.write("\n".join(list(drugs)))
f_foods = open("dict/foods.txt", encoding="utf-8", mode="w+")
f_foods.write("\n".join(list(foods)))
f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")
f_checks.write("\n".join(list(checks)))
f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")
f_checks.write("\n".join(list(checks)))
随时随地看视频