前言
前面的文章已经介绍了 neo4j 服务的本地安装,以及数据的增删改查操作方法。那么这里就要进入 python 项目,来完成医疗知识的构建,问答机器人的代码实现。但篇幅较长,本文就主要介绍知识图谱的构建吧。
环境
Anaconda3
Python3.8
Py2neo (新版)
数据来源 (结构)
编码
1. 引入依赖
import json from py2neo import Graph, Node
2. 类的初始化 (连接 neo4j)
def __init__(self): self.data_path = "./data/medical.json" self.neo4j = Graph('bolt://localhost:7687', auth=('neo4j', 'beiqiaosu123456'))
3. 读取数据
def read_data(self): # 疾病 diseases = [] # 症状 symptoms = [] # 科室 departments = [] # 药品 drugs = [] # 食物 foods = [] # 出药厂商 producers = [] # 检查项目 checks = [] # 疾病信息 disease_info = [] # 疾病与症状 rels_symptom = [] # 疾病与并发症 rels_acompany = [] # 疾病与科室 rels_category = [] # 科室与科室 rels_department = [] # 疾病与通用药品 rels_commondrug = [] # 疾病与推荐药品 rels_recommenddrug = [] # 疾病与不可吃 rels_noteat = [] # 疾病与可以吃 rels_doeat = [] # 疾病与推荐吃 rels_recommendeat = [] # 疾病与检查项 rels_check = [] # 厂商与药品 rels_drug_producer = [] for data in open(self.data_path, encoding="utf8", mode="r"): data_json = json.loads(data) disease = data_json['name'] disease_dict = dict() disease_dict['get_prob'] = '' disease_dict['yibao_status'] = '' disease_dict['easy_get'] = '' disease_dict['get_way'] = '' disease_dict['cure_lasttime'] = '' disease_dict['cured_prob'] = '' disease_dict['cost_money'] = '' disease_dict['cure_department'] = [] diseases.append(disease) disease_dict['name'] = disease disease_dict['desc'] = data_json['desc'] disease_dict['prevent'] = data_json['prevent'] disease_dict['cause'] = data_json['cause'] if "get_prob" in data_json: disease_dict['get_prob'] = data_json['get_prob'] if "yibao_status" in data_json: disease_dict['yibao_status'] = data_json['yibao_status'] if "easy_get" in data_json: disease_dict['easy_get'] = data_json['easy_get'] if "get_way" in data_json: disease_dict['get_way'] = data_json['get_way'] if "cure_lasttime" in data_json: disease_dict['cure_lasttime'] = data_json['cure_lasttime'] if "cured_prob" in data_json: disease_dict['cured_prob'] = data_json['cured_prob'] if "cost_money" in data_json: disease_dict['cost_money'] = data_json['cost_money'] disease_info.append(disease_dict) symptom = data_json['symptom'] for symptom_i in symptom: rels_symptom.append([disease, symptom_i]) symptoms += symptom # 科室 if "cure_department" in data_json: cure_department = data_json['cure_department'] departments += cure_department if len(cure_department) == 1: rels_category.append([disease, cure_department[0]]) if len(cure_department) == 2: large = cure_department[0] small = cure_department[1] rels_department.append([large, small]) rels_category.append([disease, large]) disease_dict['cure_department'] = cure_department # 并发症 if 'acompany' in data_json: acompanys = data_json['acompany'] for acompany in data_json['acompany']: rels_acompany.append([disease, acompany]) symptoms += acompanys if 'common_drug' in data_json: commondrug = data_json['common_drug'] drugs += commondrug for drug_c in commondrug: rels_commondrug.append([disease, drug_c]) recommenddrug = data_json['recommand_drug'] for drug_recom in recommenddrug: rels_recommenddrug.append([disease, drug_recom]) drugs += recommenddrug if 'not_eat' in data_json: noteat = data_json['not_eat'] for noteat_i in noteat: rels_noteat.append([disease, noteat_i]) foods += noteat if 'do_eat' in data_json: doeat = data_json['do_eat'] for doeat_i in doeat: rels_doeat.append([disease, doeat_i]) foods += doeat if 'recommand_eat' in data_json: recommendfood = data_json['recommand_eat'] for food_i in recommendfood: rels_recommendeat.append([disease, food_i]) foods += recommendfood checkitem = data_json['check'] for check_i in checkitem: check_i.replace("'", "") if check_i != "血清5'-核苷酸酶(5'-NT)": rels_check.append([disease, check_i]) checks += checkitem # 厂商与药品 druginfo = data_json['drug_detail'] producers += [name.split("(")[0] for name in druginfo] rels_drug_producer += [[name.split("(")[0], name.split("(")[-1].replace(")", "")] for name in druginfo] return set(diseases), set(symptoms), set(producers), set(departments), set(drugs), set(foods), set( checks), disease_info, rels_symptom, rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, \ rels_doeat, rels_recommendeat, rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer
4. 创建节点
def create_medical_nodes(self): print("start create nodes") diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom,\ rels_acompany,rels_commondrug,rels_recommenddrug,rels_noteat,rels_doeat,rels_recommendeat,\ rels_check,rels_drug_producer,rels_department, rels_category, rels_drug_producer = \ build_medical_graph.read_data() # 创建疾病节点 # self.create_node('Diseases', diseases) # 创建症状节点 # self.create_node('Symptoms', symptoms) # 创建科室 # self.create_node('Departments', departments) # 创建药品 # self.create_node('Drugs', drugs) # 创建食品 # self.create_node('Foods', foods) # 创建出药厂商 # self.create_node('Producers', producers) # 创建检查项 # self.create_node('Checks', checks) self.create_disease_node('Diseases', disease_info) return# 疾病节点单独创建def create_node(self, label, values): count = 0; for val in values: count += 1 print("节点: " + label + ", 名称为: " + val) node = Node(label, name = val) self.neo4j.create(node) return countdef create_disease_node(self, label, values): count = 0 for disease in values: print("节点" + label + ", 名称:" + disease['name']) node = Node(label, name=disease['name'], desc=disease['desc'], prevent=disease['prevent'],cause=disease['cause'], get_prob=disease['get_prob'],yibao_status=disease['yibao_status'],easy_get=disease['easy_get'], get_way=disease['get_way'],cure_lasttime=disease['cure_lasttime'],cured_prob=disease['cured_prob'], cost_money=disease['cost_money'],cure_department=disease['cure_department']) self.neo4j.create(node) return count
5. 创建关联边
def create_medical_rels(self): print("start create rels") diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \ rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \ rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \ build_medical_graph.read_data() # 疾病与状态 # self.create_rel("Diseases", "Symptoms", rels_symptom, "has_symptoms", "疾病症状") # 疾病与并发症 # self.create_rel("Diseases", "Symptoms", rels_acompany, "acompany_with", "疾病并发症") # 疾病与科室 # self.create_rel("Diseases", "Departments", rels_category, "belongs_to", "所属科室") # 科室与科室 # self.create_rel("Departments", "Departments", rels_department, "belongs_to", "所属") # 疾病与通用药品 # self.create_rel("Diseases", "Drugs", rels_commondrug, "common_drug", "常用备药") # 疾病与推荐药品 # self.create_rel("Diseases", "Drugs", rels_recommenddrug, "recommand_drug", "推荐用药") # 疾病与忌口 # self.create_rel("Diseases", "Foods", rels_noteat, "not_eat", "忌吃") # 疾病与可以吃 # self.create_rel("Diseases", "Foods", rels_doeat, "do_eat", "可以吃") # 疾病与推荐吃 # self.create_rel("Diseases", "Foods", rels_recommendeat, "recomment_eat", "推荐吃") # 疾病与检查项 self.create_rel("Diseases", "Checks", rels_check, "need_check", "需要检查") # 厂商与药品 # self.create_rel("Producers", "drugs", rels_drug_producer, "drug_of", "生产药品")def create_rel(self, start_node, end_node, list, rel_name, rel_attr): count = 0 for item in list: count += 1 s = item[0] e = item[1] print ("创建边:" +rel_name +",("+start_node+"->"+end_node+"),点1:"+s+"点2:"+e) query = "Match (start:%s), (end:%s) where start.name='%s' and end.name='%s' create (start)-[rel:%s{name:'%s'}]->(end)" % ( start_node, end_node, s, e, rel_name, rel_attr ) self.neo4j.run(query) return count
6. 导出节点数据
# 导出实体的节点分词 def export_data(self): diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \ rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \ rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \ build_medical_graph.read_data() # 疾病名 # f_diseases = open("dict/diseases.txt", encoding="utf-8", mode="w+") # f_diseases.write("\n".join(list(diseases))) # 症状名 f_symptoms = open("dict/symptoms.txt", encoding="utf-8", mode="w+") f_symptoms.write("\n".join(list(symptoms))) f_producers = open("dict/producers.txt", encoding="utf-8", mode="w+") f_producers.write("\n".join(list(producers))) f_departments = open("dict/departments.txt", encoding="utf-8", mode="w+") f_departments.write("\n".join(list(departments))) f_drugs = open("dict/drugs.txt", encoding="utf-8", mode="w+") f_drugs.write("\n".join(list(drugs))) f_foods = open("dict/foods.txt", encoding="utf-8", mode="w+") f_foods.write("\n".join(list(foods))) f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+") f_checks.write("\n".join(list(checks))) f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+") f_checks.write("\n".join(list(checks)))