From 1e5701637390f43c0b52743add27bd0b00d6ea8e Mon Sep 17 00:00:00 2001 From: zhuzhongshu123 <152354526+zhuzhongshu123@users.noreply.github.com> Date: Mon, 20 Jan 2025 11:19:44 +0800 Subject: [PATCH] disable entity linking in postprocess by default (#304) --- kag/builder/component/postprocessor/kag_postprocessor.py | 7 ++++--- kag/examples/2wiki/kag_config.yaml | 1 - kag/examples/README.md | 1 - kag/examples/README_cn.md | 1 - kag/examples/baike/kag_config.yaml | 1 - kag/examples/csqa/kag_config.yaml | 1 - kag/examples/domain_kg/kag_config.yaml | 1 - kag/examples/example_config.yaml | 1 - kag/examples/hotpotqa/kag_config.yaml | 1 - kag/examples/medicine/kag_config.yaml | 1 - kag/examples/musique/kag_config.yaml | 1 - 11 files changed, 4 insertions(+), 13 deletions(-) diff --git a/kag/builder/component/postprocessor/kag_postprocessor.py b/kag/builder/component/postprocessor/kag_postprocessor.py index 8af36b06..5fbdbdd3 100644 --- a/kag/builder/component/postprocessor/kag_postprocessor.py +++ b/kag/builder/component/postprocessor/kag_postprocessor.py @@ -35,7 +35,7 @@ class KAGPostProcessor(PostProcessorABC): def __init__( self, - similarity_threshold: float = 0.9, + similarity_threshold: float = None, external_graph: ExternalGraphLoaderABC = None, ): """ @@ -180,8 +180,9 @@ class KAGPostProcessor(PostProcessorABC): origin_num_nodes = len(input.nodes) origin_num_edges = len(input.edges) new_graph = self.filter_invalid_data(input) - self.similarity_based_link(new_graph) - self.external_graph_based_link(new_graph) + if self.similarity_threshold is not None: + self.similarity_based_link(new_graph) + self.external_graph_based_link(new_graph) new_num_nodes = len(new_graph.nodes) new_num_edges = len(new_graph.edges) logger.debug( diff --git a/kag/examples/2wiki/kag_config.yaml b/kag/examples/2wiki/kag_config.yaml index ac2c8110..5f558aab 100644 --- a/kag/examples/2wiki/kag_config.yaml +++ b/kag/examples/2wiki/kag_config.yaml @@ -47,7 +47,6 @@ kag_builder_pipeline: type: dict_reader # kag.builder.component.reader.dict_reader.DictReader post_processor: type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor - similarity_threshold: 0.9 splitter: type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter split_length: 100000 diff --git a/kag/examples/README.md b/kag/examples/README.md index a6f1aeac..d2b70754 100644 --- a/kag/examples/README.md +++ b/kag/examples/README.md @@ -73,7 +73,6 @@ kag_builder_pipeline: type: dict_reader # kag.builder.component.reader.dict_reader.DictReader post_processor: type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor - similarity_threshold: 0.9 splitter: type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter split_length: 100000 diff --git a/kag/examples/README_cn.md b/kag/examples/README_cn.md index 2ad523a0..7f4f1a2d 100644 --- a/kag/examples/README_cn.md +++ b/kag/examples/README_cn.md @@ -73,7 +73,6 @@ kag_builder_pipeline: type: dict_reader # kag.builder.component.reader.dict_reader.DictReader post_processor: type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor - similarity_threshold: 0.9 splitter: type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter split_length: 100000 diff --git a/kag/examples/baike/kag_config.yaml b/kag/examples/baike/kag_config.yaml index e4ebca6d..33eb9f5a 100644 --- a/kag/examples/baike/kag_config.yaml +++ b/kag/examples/baike/kag_config.yaml @@ -49,7 +49,6 @@ kag_builder_pipeline: type: txt_reader # kag.builder.component.reader.txt_reader.TXTReader post_processor: type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor - similarity_threshold: 0.9 splitter: type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter split_length: 300 diff --git a/kag/examples/csqa/kag_config.yaml b/kag/examples/csqa/kag_config.yaml index 4cc695ef..04a2a14b 100644 --- a/kag/examples/csqa/kag_config.yaml +++ b/kag/examples/csqa/kag_config.yaml @@ -47,7 +47,6 @@ kag_builder_pipeline: type: txt_reader # kag.builder.component.reader.txt_reader.TXTReader post_processor: type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor - similarity_threshold: 0.9 splitter: type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter split_length: 4950 diff --git a/kag/examples/domain_kg/kag_config.yaml b/kag/examples/domain_kg/kag_config.yaml index 43a97e19..5bb5c1f9 100644 --- a/kag/examples/domain_kg/kag_config.yaml +++ b/kag/examples/domain_kg/kag_config.yaml @@ -68,7 +68,6 @@ kag_builder_pipeline: type: txt_reader # kag.builder.component.reader.text_reader.TXTReader post_processor: type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor - similarity_threshold: 0.9 external_graph: *external_graph_loader splitter: type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter diff --git a/kag/examples/example_config.yaml b/kag/examples/example_config.yaml index e3d2bcef..b8cedc94 100644 --- a/kag/examples/example_config.yaml +++ b/kag/examples/example_config.yaml @@ -47,7 +47,6 @@ kag_builder_pipeline: type: dict_reader # kag.builder.component.reader.dict_reader.DictReader post_processor: type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor - similarity_threshold: 0.9 splitter: type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter split_length: 100000 diff --git a/kag/examples/hotpotqa/kag_config.yaml b/kag/examples/hotpotqa/kag_config.yaml index 3b1985b7..3217d3b9 100644 --- a/kag/examples/hotpotqa/kag_config.yaml +++ b/kag/examples/hotpotqa/kag_config.yaml @@ -47,7 +47,6 @@ kag_builder_pipeline: type: dict_reader # kag.builder.component.reader.dict_reader.DictReader post_processor: type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor - similarity_threshold: 0.9 splitter: type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter split_length: 100000 diff --git a/kag/examples/medicine/kag_config.yaml b/kag/examples/medicine/kag_config.yaml index e5e4035c..ff9c2478 100644 --- a/kag/examples/medicine/kag_config.yaml +++ b/kag/examples/medicine/kag_config.yaml @@ -51,7 +51,6 @@ extract_runner: name_col: title post_processor: type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor - similarity_threshold: 0.9 splitter: type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter split_length: 100000 diff --git a/kag/examples/musique/kag_config.yaml b/kag/examples/musique/kag_config.yaml index 122021ec..8e29f5db 100644 --- a/kag/examples/musique/kag_config.yaml +++ b/kag/examples/musique/kag_config.yaml @@ -47,7 +47,6 @@ kag_builder_pipeline: type: dict_reader # kag.builder.component.reader.dict_reader.DictReader post_processor: type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor - similarity_threshold: 0.9 splitter: type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter split_length: 100000