From 674fc02a5390876c59a504a5fb7ceb3dc994af90 Mon Sep 17 00:00:00 2001 From: Novice Date: Wed, 16 Apr 2025 18:12:40 +0800 Subject: [PATCH 1/4] chore: uv add json_repair --- api/pyproject.toml | 1 + api/uv.lock | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/api/pyproject.toml b/api/pyproject.toml index 9e838e1fde..85306d11fa 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -82,6 +82,7 @@ dependencies = [ "unstructured[docx,epub,md,ppt,pptx]~=0.16.1", "validators==0.21.0", "yarl~=1.18.3", + "json-repair>=0.41.1", ] # Before adding new dependency, consider place it in # alphabet order (a-z) and suitable group. diff --git a/api/uv.lock b/api/uv.lock index 4ff9c34446..4384e1abb5 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1,5 +1,4 @@ version = 1 -revision = 1 requires-python = ">=3.11, <3.13" resolution-markers = [ "python_full_version >= '3.12.4' and platform_python_implementation != 'PyPy'", @@ -1178,6 +1177,7 @@ dependencies = [ { name = "gunicorn" }, { name = "httpx", extra = ["socks"] }, { name = "jieba" }, + { name = "json-repair" }, { name = "langfuse" }, { name = "langsmith" }, { name = "mailchimp-transactional" }, @@ -1346,6 +1346,7 @@ requires-dist = [ { name = "gunicorn", specifier = "~=23.0.0" }, { name = "httpx", extras = ["socks"], specifier = "~=0.27.0" }, { name = "jieba", specifier = "==0.42.1" }, + { name = "json-repair", specifier = ">=0.41.1" }, { name = "langfuse", specifier = "~=2.51.3" }, { name = "langsmith", specifier = "~=0.1.77" }, { name = "mailchimp-transactional", specifier = "~=1.0.50" }, @@ -2524,6 +2525,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6", size = 301817 }, ] +[[package]] +name = "json-repair" +version = "0.41.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/6a/6c7a75a10da6dc807b582f2449034da1ed74415e8899746bdfff97109012/json_repair-0.41.1.tar.gz", hash = "sha256:bba404b0888c84a6b86ecc02ec43b71b673cfee463baf6da94e079c55b136565", size = 31208 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/5c/abd7495c934d9af5c263c2245ae30cfaa716c3c0cf027b2b8fa686ee7bd4/json_repair-0.41.1-py3-none-any.whl", hash = "sha256:0e181fd43a696887881fe19fed23422a54b3e4c558b6ff27a86a8c3ddde9ae79", size = 21578 }, +] + [[package]] name = "jsonpath-python" version = "1.0.6" @@ -4074,6 +4084,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/af/cd/ed6e429fb0792ce368f66e83246264dd3a7a045b0b1e63043ed22a063ce5/pycryptodome-3.19.1-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7c9e222d0976f68d0cf6409cfea896676ddc1d98485d601e9508f90f60e2b0a2", size = 2144914 }, { url = "https://files.pythonhosted.org/packages/f6/23/b064bd4cfbf2cc5f25afcde0e7c880df5b20798172793137ba4b62d82e72/pycryptodome-3.19.1-cp35-abi3-win32.whl", hash = "sha256:4805e053571140cb37cf153b5c72cd324bb1e3e837cbe590a19f69b6cf85fd03", size = 1713105 }, { url = "https://files.pythonhosted.org/packages/7d/e0/ded1968a5257ab34216a0f8db7433897a2337d59e6d03be113713b346ea2/pycryptodome-3.19.1-cp35-abi3-win_amd64.whl", hash = "sha256:a470237ee71a1efd63f9becebc0ad84b88ec28e6784a2047684b693f458f41b7", size = 1749222 }, + { url = "https://files.pythonhosted.org/packages/1d/e3/0c9679cd66cf5604b1f070bdf4525a0c01a15187be287d8348b2eafb718e/pycryptodome-3.19.1-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:ed932eb6c2b1c4391e166e1a562c9d2f020bfff44a0e1b108f67af38b390ea89", size = 1629005 }, + { url = "https://files.pythonhosted.org/packages/13/75/0d63bf0daafd0580b17202d8a9dd57f28c8487f26146b3e2799b0c5a059c/pycryptodome-3.19.1-pp27-pypy_73-win32.whl", hash = "sha256:81e9d23c0316fc1b45d984a44881b220062336bbdc340aa9218e8d0656587934", size = 1697997 }, ] [[package]] From 56ef622447e1858f3d28265d254b474b2f4a5e03 Mon Sep 17 00:00:00 2001 From: Novice Date: Wed, 16 Apr 2025 18:16:22 +0800 Subject: [PATCH 2/4] chore: sort dependencies --- api/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/pyproject.toml b/api/pyproject.toml index 85306d11fa..08f9c1e229 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "gunicorn~=23.0.0", "httpx[socks]~=0.27.0", "jieba==0.42.1", + "json-repair>=0.41.1", "langfuse~=2.51.3", "langsmith~=0.1.77", "mailchimp-transactional~=1.0.50", @@ -82,7 +83,6 @@ dependencies = [ "unstructured[docx,epub,md,ppt,pptx]~=0.16.1", "validators==0.21.0", "yarl~=1.18.3", - "json-repair>=0.41.1", ] # Before adding new dependency, consider place it in # alphabet order (a-z) and suitable group. From e6a7a1884c8a359d8081850fdbac6519ff926d97 Mon Sep 17 00:00:00 2001 From: Novice Date: Thu, 17 Apr 2025 19:34:20 +0800 Subject: [PATCH 3/4] chore: fix the code review issues --- api/.env.example | 1 - api/controllers/console/app/generator.py | 2 -- api/core/llm_generator/llm_generator.py | 18 ++++------ api/core/llm_generator/prompts.py | 4 +-- .../model_runtime/entities/model_entities.py | 13 +++---- api/core/workflow/nodes/llm/node.py | 36 ++++++++++++------- docker/.env.example | 5 --- docker/docker-compose.yaml | 1 - 8 files changed, 37 insertions(+), 43 deletions(-) diff --git a/api/.env.example b/api/.env.example index a745746efa..af95a4fe2d 100644 --- a/api/.env.example +++ b/api/.env.example @@ -326,7 +326,6 @@ UPLOAD_AUDIO_FILE_SIZE_LIMIT=50 MULTIMODAL_SEND_FORMAT=base64 PROMPT_GENERATION_MAX_TOKENS=512 CODE_GENERATION_MAX_TOKENS=1024 -STRUCTURED_OUTPUT_MAX_TOKENS=1024 PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false # Mail configuration, support: resend, smtp diff --git a/api/controllers/console/app/generator.py b/api/controllers/console/app/generator.py index 0ab2aaafbb..4046417076 100644 --- a/api/controllers/console/app/generator.py +++ b/api/controllers/console/app/generator.py @@ -96,13 +96,11 @@ class RuleStructuredOutputGenerateApi(Resource): args = parser.parse_args() account = current_user - structured_output_max_tokens = int(os.getenv("STRUCTURED_OUTPUT_MAX_TOKENS", "1024")) try: structured_output = LLMGenerator.generate_structured_output( tenant_id=account.current_tenant_id, instruction=args["instruction"], model_config=args["model_config"], - max_tokens=structured_output_max_tokens, ) except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) diff --git a/api/core/llm_generator/llm_generator.py b/api/core/llm_generator/llm_generator.py index 9cdccd8e7f..d5d2ca60fa 100644 --- a/api/core/llm_generator/llm_generator.py +++ b/api/core/llm_generator/llm_generator.py @@ -10,7 +10,7 @@ from core.llm_generator.prompts import ( GENERATOR_QA_PROMPT, JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE, PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE, - STRUCTURED_OUTPUT_GENERATE_TEMPLATE, + SYSTEM_STRUCTURED_OUTPUT_GENERATE, WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE, ) from core.model_manager import ModelManager @@ -343,16 +343,7 @@ class LLMGenerator: return answer.strip() @classmethod - def generate_structured_output(cls, tenant_id: str, instruction: str, model_config: dict, max_tokens: int): - prompt_template = PromptTemplateParser(STRUCTURED_OUTPUT_GENERATE_TEMPLATE) - - prompt = prompt_template.format( - inputs={ - "INSTRUCTION": instruction, - }, - remove_template_variables=False, - ) - + def generate_structured_output(cls, tenant_id: str, instruction: str, model_config: dict): model_manager = ModelManager() model_instance = model_manager.get_model_instance( tenant_id=tenant_id, @@ -361,7 +352,10 @@ class LLMGenerator: model=model_config.get("name", ""), ) - prompt_messages = [UserPromptMessage(content=prompt)] + prompt_messages = [ + SystemPromptMessage(content=SYSTEM_STRUCTURED_OUTPUT_GENERATE), + UserPromptMessage(content=instruction), + ] model_parameters = model_config.get("model_parameters", {}) try: diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py index 1ac03b9471..5882d2d76f 100644 --- a/api/core/llm_generator/prompts.py +++ b/api/core/llm_generator/prompts.py @@ -325,7 +325,5 @@ Your task is to convert simple user descriptions into properly formatted JSON Sc ] } -Now, generate a JSON Schema based on my description: -**User Input:** {{INSTRUCTION}} -**JSON Schema Output:** +Now, generate a JSON Schema based on my description """ # noqa: E501 diff --git a/api/core/model_runtime/entities/model_entities.py b/api/core/model_runtime/entities/model_entities.py index 3009178c2b..373ef2bbe2 100644 --- a/api/core/model_runtime/entities/model_entities.py +++ b/api/core/model_runtime/entities/model_entities.py @@ -202,12 +202,13 @@ class AIModelEntity(ProviderModel): def validate_model(self): supported_schema_keys = ["json_schema"] schema_key = next((rule.name for rule in self.parameter_rules if rule.name in supported_schema_keys), None) - if schema_key: - if self.features is None: - self.features = [ModelFeature.STRUCTURED_OUTPUT] - else: - if ModelFeature.STRUCTURED_OUTPUT not in self.features: - self.features = [*self.features, ModelFeature.STRUCTURED_OUTPUT] + if not schema_key: + return self + if self.features is None: + self.features = [ModelFeature.STRUCTURED_OUTPUT] + else: + if ModelFeature.STRUCTURED_OUTPUT not in self.features: + self.features.append(ModelFeature.STRUCTURED_OUTPUT) return self diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py index 97f81ea7fd..dbd92cb426 100644 --- a/api/core/workflow/nodes/llm/node.py +++ b/api/core/workflow/nodes/llm/node.py @@ -102,6 +102,12 @@ class LLMNode(BaseNode[LLMNodeData]): _node_type = NodeType.LLM def _run(self) -> Generator[NodeEvent | InNodeEvent, None, None]: + def process_structured_output(text: str) -> Optional[dict[str, Any] | list[Any]]: + """Process structured output if enabled""" + if not self.node_data.structured_output_enabled or not self.node_data.structured_output: + return None + return self._parse_structured_output(text) + node_inputs: Optional[dict[str, Any]] = None process_data = None result_text = "" @@ -201,19 +207,8 @@ class LLMNode(BaseNode[LLMNodeData]): self.deduct_llm_quota(tenant_id=self.tenant_id, model_instance=model_instance, usage=usage) break outputs = {"text": result_text, "usage": jsonable_encoder(usage), "finish_reason": finish_reason} - if self.node_data.structured_output_enabled and self.node_data.structured_output: - structured_output: dict[str, Any] | list[Any] = {} - try: - parsed = json.loads(result_text) - if not isinstance(parsed, (dict | list)): - raise LLMNodeError(f"Failed to parse structured output: {result_text}") - structured_output = parsed - except json.JSONDecodeError as e: - # if the result_text is not a valid json, try to repair it - parsed = json_repair.loads(result_text) - if not isinstance(parsed, (dict | list)): - raise LLMNodeError(f"Failed to parse structured output: {result_text}") - structured_output = parsed + structured_output = process_structured_output(result_text) + if structured_output: outputs["structured_output"] = structured_output yield RunCompletedEvent( run_result=NodeRunResult( @@ -759,6 +754,21 @@ class LLMNode(BaseNode[LLMNodeData]): stop = model_config.stop return filtered_prompt_messages, stop + def _parse_structured_output(self, result_text: str) -> dict[str, Any] | list[Any]: + structured_output: dict[str, Any] | list[Any] = {} + try: + parsed = json.loads(result_text) + if not isinstance(parsed, (dict | list)): + raise LLMNodeError(f"Failed to parse structured output: {result_text}") + structured_output = parsed + except json.JSONDecodeError as e: + # if the result_text is not a valid json, try to repair it + parsed = json_repair.loads(result_text) + if not isinstance(parsed, (dict | list)): + raise LLMNodeError(f"Failed to parse structured output: {result_text}") + structured_output = parsed + return structured_output + @classmethod def deduct_llm_quota(cls, tenant_id: str, model_instance: ModelInstance, usage: LLMUsage) -> None: provider_model_bundle = model_instance.provider_model_bundle diff --git a/docker/.env.example b/docker/.env.example index 00b2aef052..e49e8fee89 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -622,11 +622,6 @@ PROMPT_GENERATION_MAX_TOKENS=512 # Default: 1024 tokens. CODE_GENERATION_MAX_TOKENS=1024 -# The maximum number of tokens allowed for structured output. -# This setting controls the upper limit of tokens that can be used by the LLM -# when generating structured output in the structured output tool. -# Default: 1024 tokens. -STRUCTURED_OUTPUT_MAX_TOKENS=1024 # Enable or disable plugin based token counting. If disabled, token counting will return 0. # This can improve performance by skipping token counting operations. # Default: false (disabled). diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 041480f9ec..25b0c56561 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -279,7 +279,6 @@ x-shared-env: &shared-api-worker-env SCARF_NO_ANALYTICS: ${SCARF_NO_ANALYTICS:-true} PROMPT_GENERATION_MAX_TOKENS: ${PROMPT_GENERATION_MAX_TOKENS:-512} CODE_GENERATION_MAX_TOKENS: ${CODE_GENERATION_MAX_TOKENS:-1024} - STRUCTURED_OUTPUT_MAX_TOKENS: ${STRUCTURED_OUTPUT_MAX_TOKENS:-1024} PLUGIN_BASED_TOKEN_COUNTING_ENABLED: ${PLUGIN_BASED_TOKEN_COUNTING_ENABLED:-false} MULTIMODAL_SEND_FORMAT: ${MULTIMODAL_SEND_FORMAT:-base64} UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10} From 9c56db736ca7c5239095e4d818a346fde0dd4499 Mon Sep 17 00:00:00 2001 From: Novice Date: Thu, 17 Apr 2025 19:41:27 +0800 Subject: [PATCH 4/4] chore: fix the prompt name change --- api/core/llm_generator/prompts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py index 5882d2d76f..82d22d7f89 100644 --- a/api/core/llm_generator/prompts.py +++ b/api/core/llm_generator/prompts.py @@ -221,7 +221,7 @@ Here is the task description: {{INPUT_TEXT}} You just need to generate the output """ # noqa: E501 -STRUCTURED_OUTPUT_GENERATE_TEMPLATE = """ +SYSTEM_STRUCTURED_OUTPUT_GENERATE = """ Your task is to convert simple user descriptions into properly formatted JSON Schema definitions. When a user describes data fields they need, generate a complete, valid JSON Schema that accurately represents those fields with appropriate types and requirements. ## Instructions: