mirror of https://github.com/OpenSPG/KAG
211 lines
7.9 KiB
Python
211 lines
7.9 KiB
Python
# -*- coding: utf-8 -*-
|
||
# Copyright 2023 OpenSPG Authors
|
||
#
|
||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||
# in compliance with the License. You may obtain a copy of the License at
|
||
#
|
||
# http://www.apache.org/licenses/LICENSE-2.0
|
||
#
|
||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||
# or implied.
|
||
|
||
import json
|
||
from typing import Optional, List
|
||
|
||
from kag.common.base.prompt_op import PromptOp
|
||
|
||
|
||
class OpenIETriplePrompt(PromptOp):
|
||
template_en = """
|
||
{
|
||
"instruction": "You are an expert specializing in carrying out open information extraction (OpenIE). Please extract any possible relations (including subject, predicate, object) from the given text, and list them following the json format {\"triples\": [[\"subject\", \"predicate\", \"object\"]]}\n. If there are none, do not list them.\n.\n\nPay attention to the following requirements:\n- Each triple should contain at least one, but preferably two, of the named entities in the entity_list.\n- Clearly resolve pronouns to their specific names to maintain clarity.",
|
||
"entity_list": $entity_list,
|
||
"input": "$input",
|
||
"example": {
|
||
"input": "The Rezort\nThe Rezort is a 2015 British zombie horror film directed by Steve Barker and written by Paul Gerstenberger.\n It stars Dougray Scott, Jessica De Gouw and Martin McCann.\n After humanity wins a devastating war against zombies, the few remaining undead are kept on a secure island, where they are hunted for sport.\n When something goes wrong with the island's security, the guests must face the possibility of a new outbreak.",
|
||
"entity_list": [
|
||
{
|
||
"entity": "The Rezort",
|
||
"category": "Works"
|
||
},
|
||
{
|
||
"entity": "2015",
|
||
"category": "Others"
|
||
},
|
||
{
|
||
"entity": "British",
|
||
"category": "GeographicLocation"
|
||
},
|
||
{
|
||
"entity": "Steve Barker",
|
||
"category": "Person"
|
||
},
|
||
{
|
||
"entity": "Paul Gerstenberger",
|
||
"category": "Person"
|
||
},
|
||
{
|
||
"entity": "Dougray Scott",
|
||
"category": "Person"
|
||
},
|
||
{
|
||
"entity": "Jessica De Gouw",
|
||
"category": "Person"
|
||
},
|
||
{
|
||
"entity": "Martin McCann",
|
||
"category": "Person"
|
||
},
|
||
{
|
||
"entity": "zombies",
|
||
"category": "Creature"
|
||
},
|
||
{
|
||
"entity": "zombie horror film",
|
||
"category": "Concept"
|
||
},
|
||
{
|
||
"entity": "humanity",
|
||
"category": "Concept"
|
||
},
|
||
{
|
||
"entity": "secure island",
|
||
"category": "GeographicLocation"
|
||
}
|
||
],
|
||
"output": [
|
||
[
|
||
"The Rezort",
|
||
"is",
|
||
"zombie horror film"
|
||
],
|
||
[
|
||
"The Rezort",
|
||
"publish at",
|
||
"2015"
|
||
],
|
||
[
|
||
"The Rezort",
|
||
"released",
|
||
"British"
|
||
],
|
||
[
|
||
"The Rezort",
|
||
"is directed by",
|
||
"Steve Barker"
|
||
],
|
||
[
|
||
"The Rezort",
|
||
"is written by",
|
||
"Paul Gerstenberger"
|
||
],
|
||
[
|
||
"The Rezort",
|
||
"stars",
|
||
"Dougray Scott"
|
||
],
|
||
[
|
||
"The Rezort",
|
||
"stars",
|
||
"Jessica De Gouw"
|
||
],
|
||
[
|
||
"The Rezort",
|
||
"stars",
|
||
"Martin McCann"
|
||
],
|
||
[
|
||
"humanity",
|
||
"wins",
|
||
"a devastating war against zombies"
|
||
],
|
||
[
|
||
"the few remaining undead",
|
||
"are kept on",
|
||
"a secure island"
|
||
],
|
||
[
|
||
"they",
|
||
"are hunted for",
|
||
"sport"
|
||
],
|
||
[
|
||
"something",
|
||
"goes wrong with",
|
||
"the island's security"
|
||
],
|
||
[
|
||
"the guests",
|
||
"must face",
|
||
"the possibility of a new outbreak"
|
||
]
|
||
]
|
||
}
|
||
}
|
||
"""
|
||
|
||
template_zh = """
|
||
{
|
||
"instruction": "您是一位专门从事开放信息提取(OpenIE)的专家。请从input字段的文本中提取任何可能的关系(包括主语、谓语、宾语),并按照JSON格式列出它们,须遵循example字段的示例格式。请注意以下要求:1. 每个三元组应至少包含entity_list实体列表中的一个,但最好是两个命名实体。2. 明确地将代词解析为特定名称,以保持清晰度。",
|
||
"entity_list": $entity_list,
|
||
"input": "$input",
|
||
"example": {
|
||
"input": "烦躁不安、语妄、失眠酌用镇静药,禁用抑制呼吸的镇静药。\n3.并发症的处理经抗菌药物治疗后,高热常在24小时内消退,或数日内逐渐下降。\n若体温降而复升或3天后仍不降者,应考虑SP的肺外感染,如腋胸、心包炎或关节炎等。治疗:接胸腔压力调节管+吸引机负压吸引水瓶装置闭式负压吸引宜连续,如经12小时后肺仍未复张,应查找原因。",
|
||
"entity_list": [
|
||
{"entity": "烦躁不安", "category": "Symptom"},
|
||
{"entity": "语妄", "category": "Symptom"},
|
||
{"entity": "失眠", "category": "Symptom"},
|
||
{"entity": "镇静药", "category": "Medicine"},
|
||
{"entity": "肺外感染", "category": "Disease"},
|
||
{"entity": "胸腔压力调节管", "category": "MedicalEquipment"},
|
||
{"entity": "吸引机负压吸引水瓶装置", "category": "MedicalEquipment"},
|
||
{"entity": "闭式负压吸引", "category": "SurgicalOperation"}
|
||
],
|
||
"output":[
|
||
["烦躁不安", "酌用", "镇静药"],
|
||
["语妄", "酌用", "镇静药"],
|
||
["失眠", "酌用", "镇静药"],
|
||
["镇静药", "禁用", "抑制呼吸的镇静药"],
|
||
["高热", "消退", "24小时内"],
|
||
["高热", "下降", "数日内"],
|
||
["体温", "降而复升或3天后仍不降", "肺外感染"],
|
||
["肺外感染", "考虑", "腋胸、心包炎或关节炎"],
|
||
["胸腔压力调节管", "接", "吸引机负压吸引水瓶装置"],
|
||
["闭式负压吸引", "宜连续", "如经12小时后肺仍未复张"]
|
||
]
|
||
}
|
||
}
|
||
"""
|
||
|
||
def __init__(self, language: Optional[str] = "en"):
|
||
super().__init__(language)
|
||
|
||
@property
|
||
def template_variables(self) -> List[str]:
|
||
return ["entity_list", "input"]
|
||
|
||
def parse_response(self, response: str, **kwargs):
|
||
rsp = response
|
||
if isinstance(rsp, str):
|
||
rsp = json.loads(rsp)
|
||
if isinstance(rsp, dict) and "output" in rsp:
|
||
rsp = rsp["output"]
|
||
if isinstance(rsp, dict) and "triples" in rsp:
|
||
triples = rsp["triples"]
|
||
else:
|
||
triples = rsp
|
||
|
||
standardized_triples = []
|
||
for triple in triples:
|
||
if isinstance(triple, list):
|
||
standardized_triples.append(triple)
|
||
elif isinstance(triple, dict):
|
||
s = triple.get("subject")
|
||
p = triple.get("predicate")
|
||
o = triple.get("object")
|
||
if s and p and o:
|
||
standardized_triples.append([s, p, o])
|
||
|
||
return standardized_triples
|