KAG/kag/builder/prompt/default/triple.py

211 lines
7.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.
import json
from typing import Optional, List
from kag.common.base.prompt_op import PromptOp
class OpenIETriplePrompt(PromptOp):
template_en = """
{
"instruction": "You are an expert specializing in carrying out open information extraction (OpenIE). Please extract any possible relations (including subject, predicate, object) from the given text, and list them following the json format {\"triples\": [[\"subject\", \"predicate\", \"object\"]]}\n. If there are none, do not list them.\n.\n\nPay attention to the following requirements:\n- Each triple should contain at least one, but preferably two, of the named entities in the entity_list.\n- Clearly resolve pronouns to their specific names to maintain clarity.",
"entity_list": $entity_list,
"input": "$input",
"example": {
"input": "The Rezort\nThe Rezort is a 2015 British zombie horror film directed by Steve Barker and written by Paul Gerstenberger.\n It stars Dougray Scott, Jessica De Gouw and Martin McCann.\n After humanity wins a devastating war against zombies, the few remaining undead are kept on a secure island, where they are hunted for sport.\n When something goes wrong with the island's security, the guests must face the possibility of a new outbreak.",
"entity_list": [
{
"entity": "The Rezort",
"category": "Works"
},
{
"entity": "2015",
"category": "Others"
},
{
"entity": "British",
"category": "GeographicLocation"
},
{
"entity": "Steve Barker",
"category": "Person"
},
{
"entity": "Paul Gerstenberger",
"category": "Person"
},
{
"entity": "Dougray Scott",
"category": "Person"
},
{
"entity": "Jessica De Gouw",
"category": "Person"
},
{
"entity": "Martin McCann",
"category": "Person"
},
{
"entity": "zombies",
"category": "Creature"
},
{
"entity": "zombie horror film",
"category": "Concept"
},
{
"entity": "humanity",
"category": "Concept"
},
{
"entity": "secure island",
"category": "GeographicLocation"
}
],
"output": [
[
"The Rezort",
"is",
"zombie horror film"
],
[
"The Rezort",
"publish at",
"2015"
],
[
"The Rezort",
"released",
"British"
],
[
"The Rezort",
"is directed by",
"Steve Barker"
],
[
"The Rezort",
"is written by",
"Paul Gerstenberger"
],
[
"The Rezort",
"stars",
"Dougray Scott"
],
[
"The Rezort",
"stars",
"Jessica De Gouw"
],
[
"The Rezort",
"stars",
"Martin McCann"
],
[
"humanity",
"wins",
"a devastating war against zombies"
],
[
"the few remaining undead",
"are kept on",
"a secure island"
],
[
"they",
"are hunted for",
"sport"
],
[
"something",
"goes wrong with",
"the island's security"
],
[
"the guests",
"must face",
"the possibility of a new outbreak"
]
]
}
}
"""
template_zh = """
{
"instruction": "您是一位专门从事开放信息提取OpenIE的专家。请从input字段的文本中提取任何可能的关系包括主语、谓语、宾语并按照JSON格式列出它们须遵循example字段的示例格式。请注意以下要求1. 每个三元组应至少包含entity_list实体列表中的一个但最好是两个命名实体。2. 明确地将代词解析为特定名称,以保持清晰度。",
"entity_list": $entity_list,
"input": "$input",
"example": {
"input": "烦躁不安、语妄、失眠酌用镇静药,禁用抑制呼吸的镇静药。\n3.并发症的处理经抗菌药物治疗后高热常在24小时内消退或数日内逐渐下降。\n若体温降而复升或3天后仍不降者应考虑SP的肺外感染如腋胸、心包炎或关节炎等。治疗接胸腔压力调节管吸引机负压吸引水瓶装置闭式负压吸引宜连续如经12小时后肺仍未复张应查找原因。",
"entity_list": [
{"entity": "烦躁不安", "category": "Symptom"},
{"entity": "语妄", "category": "Symptom"},
{"entity": "失眠", "category": "Symptom"},
{"entity": "镇静药", "category": "Medicine"},
{"entity": "肺外感染", "category": "Disease"},
{"entity": "胸腔压力调节管", "category": "MedicalEquipment"},
{"entity": "吸引机负压吸引水瓶装置", "category": "MedicalEquipment"},
{"entity": "闭式负压吸引", "category": "SurgicalOperation"}
],
"output":[
["烦躁不安", "酌用", "镇静药"],
["语妄", "酌用", "镇静药"],
["失眠", "酌用", "镇静药"],
["镇静药", "禁用", "抑制呼吸的镇静药"],
["高热", "消退", "24小时内"],
["高热", "下降", "数日内"],
["体温", "降而复升或3天后仍不降", "肺外感染"],
["肺外感染", "考虑", "腋胸、心包炎或关节炎"],
["胸腔压力调节管", "", "吸引机负压吸引水瓶装置"],
["闭式负压吸引", "宜连续", "如经12小时后肺仍未复张"]
]
}
}
"""
def __init__(self, language: Optional[str] = "en"):
super().__init__(language)
@property
def template_variables(self) -> List[str]:
return ["entity_list", "input"]
def parse_response(self, response: str, **kwargs):
rsp = response
if isinstance(rsp, str):
rsp = json.loads(rsp)
if isinstance(rsp, dict) and "output" in rsp:
rsp = rsp["output"]
if isinstance(rsp, dict) and "triples" in rsp:
triples = rsp["triples"]
else:
triples = rsp
standardized_triples = []
for triple in triples:
if isinstance(triple, list):
standardized_triples.append(triple)
elif isinstance(triple, dict):
s = triple.get("subject")
p = triple.get("predicate")
o = triple.get("object")
if s and p and o:
standardized_triples.append([s, p, o])
return standardized_triples