KAG/kag/builder/prompt/default/std.py

144 lines
7.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.
import json
from typing import Optional, List
from kag.common.base.prompt_op import PromptOp
class OpenIEEntitystandardizationdPrompt(PromptOp):
template_en = """
{
"instruction": "The `input` field contains a user provided context. The `named_entities` field contains extracted named entities from the context, which may be unclear abbreviations, aliases, or slang. To eliminate ambiguity, please attempt to provide the official names of these entities based on the context and your own knowledge. Note that entities with the same meaning can only have ONE official name. Please respond in the format of a single JSONArray string without any explanation, as shown in the `output` field of the provided example.",
"example": {
"input": "American History\nWhen did the political party that favored harsh punishment of southern states after the Civil War, gain control of the House? Republicans regained control of the chamber they had lost in the 2006 midterm elections.",
"named_entities": [
{"entity": "American", "category": "GeographicLocation"},
{"entity": "political party", "category": "Organization"},
{"entity": "southern states", "category": "GeographicLocation"},
{"entity": "Civil War", "category": "Keyword"},
{"entity": "House", "category": "Organization"},
{"entity": "Republicans", "category": "Organization"},
{"entity": "chamber", "category": "Organization"},
{"entity": "2006 midterm elections", "category": "Date"}
],
"output": [
{
"entity": "American",
"category": "GeographicLocation",
"official_name": "United States of America"
},
{
"entity": "political party",
"category": "Organization",
"official_name": "Radical Republicans"
},
{
"entity": "southern states",
"category": "GeographicLocation",
"official_name": "Confederacy"
},
{
"entity": "Civil War",
"category": "Keyword",
"official_name": "American Civil War"
},
{
"entity": "House",
"category": "Organization",
"official_name": "United States House of Representatives"
},
{
"entity": "Republicans",
"category": "Organization",
"official_name": "Republican Party"
},
{
"entity": "chamber",
"category": "Organization",
"official_name": "United States House of Representatives"
},
{
"entity": "midterm elections",
"category": "Date",
"official_name": "United States midterm elections"
}
]
},
"input": "$input",
"named_entities": $named_entities
}
"""
template_zh = """
{
"instruction": "input字段包含用户提供的上下文。命名实体字段包含从上下文中提取的命名实体这些可能是含义不明的缩写、别名或俚语。为了消除歧义请尝试根据上下文和您自己的知识提供这些实体的官方名称。请注意具有相同含义的实体只能有一个官方名称。请按照提供的示例中的输出字段格式以单个JSONArray字符串形式回复无需任何解释。",
"example": {
"input": "烦躁不安、语妄、失眠酌用镇静药,禁用抑制呼吸的镇静药。\n3.并发症的处理经抗菌药物治疗后高热常在24小时内消退或数日内逐渐下降。\n若体温降而复升或3天后仍不降者应考虑SP的肺外感染如腋胸、心包炎或关节炎等。治疗接胸腔压力调节管吸引机负压吸引水瓶装置闭式负压吸引宜连续如经12小时后肺仍未复张应查找原因。",
"named_entities": [
{"entity": "烦躁不安", "category": "Symptom"},
{"entity": "语妄", "category": "Symptom"},
{"entity": "失眠", "category": "Symptom"},
{"entity": "镇静药", "category": "Medicine"},
{"entity": "肺外感染", "category": "Disease"},
{"entity": "胸腔压力调节管", "category": "MedicalEquipment"},
{"entity": "吸引机负压吸引水瓶装置", "category": "MedicalEquipment"},
{"entity": "闭式负压吸引", "category": "SurgicalOperation"}
],
"output": [
{"entity": "烦躁不安", "category": "Symptom", "official_name": "焦虑不安"},
{"entity": "语妄", "category": "Symptom", "official_name": "谵妄"},
{"entity": "失眠", "category": "Symptom", "official_name": "失眠症"},
{"entity": "镇静药", "category": "Medicine", "official_name": "镇静剂"},
{"entity": "肺外感染", "category": "Disease", "official_name": "肺外感染"},
{"entity": "胸腔压力调节管", "category": "MedicalEquipment", "official_name": "胸腔引流管"},
{"entity": "吸引机负压吸引水瓶装置", "category": "MedicalEquipment", "official_name": "负压吸引装置"},
{"entity": "闭式负压吸引", "category": "SurgicalOperation", "official_name": "闭式负压引流"}
]
},
"input": $input,
"named_entities": $named_entities,
}
"""
def __init__(self, language: Optional[str] = "en"):
super().__init__(language)
@property
def template_variables(self) -> List[str]:
return ["input", "named_entities"]
def parse_response(self, response: str, **kwargs):
rsp = response
if isinstance(rsp, str):
rsp = json.loads(rsp)
if isinstance(rsp, dict) and "output" in rsp:
rsp = rsp["output"]
if isinstance(rsp, dict) and "named_entities" in rsp:
standardized_entity = rsp["named_entities"]
else:
standardized_entity = rsp
entities_with_offical_name = set()
merged = []
entities = kwargs.get("named_entities", [])
for entity in standardized_entity:
merged.append(entity)
entities_with_offical_name.add(entity["entity"])
# in case llm ignores some entities
for entity in entities:
if entity["entity"] not in entities_with_offical_name:
entity["official_name"] = entity["entity"]
merged.append(entity)
return merged