mirror of https://github.com/langgenius/dify.git
feat: add a abstract layer for WorkflowNodeExcetion (#18026)
This commit is contained in:
parent
77fde04ef7
commit
6d9dd3109e
|
@ -424,6 +424,12 @@ WORKFLOW_CALL_MAX_DEPTH=5
|
|||
WORKFLOW_PARALLEL_DEPTH_LIMIT=3
|
||||
MAX_VARIABLE_SIZE=204800
|
||||
|
||||
# Workflow storage configuration
|
||||
# Options: rdbms, hybrid
|
||||
# rdbms: Use only the relational database (default)
|
||||
# hybrid: Save new data to object storage, read from both object storage and RDBMS
|
||||
WORKFLOW_NODE_EXECUTION_STORAGE=rdbms
|
||||
|
||||
# App configuration
|
||||
APP_MAX_EXECUTION_TIME=1200
|
||||
APP_MAX_ACTIVE_REQUESTS=0
|
||||
|
|
|
@ -54,6 +54,7 @@ def initialize_extensions(app: DifyApp):
|
|||
ext_otel,
|
||||
ext_proxy_fix,
|
||||
ext_redis,
|
||||
ext_repositories,
|
||||
ext_sentry,
|
||||
ext_set_secretkey,
|
||||
ext_storage,
|
||||
|
@ -74,6 +75,7 @@ def initialize_extensions(app: DifyApp):
|
|||
ext_migrate,
|
||||
ext_redis,
|
||||
ext_storage,
|
||||
ext_repositories,
|
||||
ext_celery,
|
||||
ext_login,
|
||||
ext_mail,
|
||||
|
|
|
@ -12,7 +12,7 @@ from pydantic import (
|
|||
)
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
from configs.feature.hosted_service import HostedServiceConfig
|
||||
from .hosted_service import HostedServiceConfig
|
||||
|
||||
|
||||
class SecurityConfig(BaseSettings):
|
||||
|
@ -519,6 +519,11 @@ class WorkflowNodeExecutionConfig(BaseSettings):
|
|||
default=100,
|
||||
)
|
||||
|
||||
WORKFLOW_NODE_EXECUTION_STORAGE: str = Field(
|
||||
default="rdbms",
|
||||
description="Storage backend for WorkflowNodeExecution. Options: 'rdbms', 'hybrid'",
|
||||
)
|
||||
|
||||
|
||||
class AuthConfig(BaseSettings):
|
||||
"""
|
||||
|
|
|
@ -320,10 +320,9 @@ class AdvancedChatAppGenerateTaskPipeline:
|
|||
session=session, workflow_run_id=self._workflow_run_id
|
||||
)
|
||||
workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_retried(
|
||||
session=session, workflow_run=workflow_run, event=event
|
||||
workflow_run=workflow_run, event=event
|
||||
)
|
||||
node_retry_resp = self._workflow_cycle_manager._workflow_node_retry_to_stream_response(
|
||||
session=session,
|
||||
event=event,
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
|
@ -341,11 +340,10 @@ class AdvancedChatAppGenerateTaskPipeline:
|
|||
session=session, workflow_run_id=self._workflow_run_id
|
||||
)
|
||||
workflow_node_execution = self._workflow_cycle_manager._handle_node_execution_start(
|
||||
session=session, workflow_run=workflow_run, event=event
|
||||
workflow_run=workflow_run, event=event
|
||||
)
|
||||
|
||||
node_start_resp = self._workflow_cycle_manager._workflow_node_start_to_stream_response(
|
||||
session=session,
|
||||
event=event,
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
|
@ -363,11 +361,10 @@ class AdvancedChatAppGenerateTaskPipeline:
|
|||
|
||||
with Session(db.engine, expire_on_commit=False) as session:
|
||||
workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_success(
|
||||
session=session, event=event
|
||||
event=event
|
||||
)
|
||||
|
||||
node_finish_resp = self._workflow_cycle_manager._workflow_node_finish_to_stream_response(
|
||||
session=session,
|
||||
event=event,
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
|
@ -383,18 +380,15 @@ class AdvancedChatAppGenerateTaskPipeline:
|
|||
| QueueNodeInLoopFailedEvent
|
||||
| QueueNodeExceptionEvent,
|
||||
):
|
||||
with Session(db.engine, expire_on_commit=False) as session:
|
||||
workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_failed(
|
||||
session=session, event=event
|
||||
event=event
|
||||
)
|
||||
|
||||
node_finish_resp = self._workflow_cycle_manager._workflow_node_finish_to_stream_response(
|
||||
session=session,
|
||||
event=event,
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
if node_finish_resp:
|
||||
yield node_finish_resp
|
||||
|
|
|
@ -279,10 +279,9 @@ class WorkflowAppGenerateTaskPipeline:
|
|||
session=session, workflow_run_id=self._workflow_run_id
|
||||
)
|
||||
workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_retried(
|
||||
session=session, workflow_run=workflow_run, event=event
|
||||
workflow_run=workflow_run, event=event
|
||||
)
|
||||
response = self._workflow_cycle_manager._workflow_node_retry_to_stream_response(
|
||||
session=session,
|
||||
event=event,
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
|
@ -300,10 +299,9 @@ class WorkflowAppGenerateTaskPipeline:
|
|||
session=session, workflow_run_id=self._workflow_run_id
|
||||
)
|
||||
workflow_node_execution = self._workflow_cycle_manager._handle_node_execution_start(
|
||||
session=session, workflow_run=workflow_run, event=event
|
||||
workflow_run=workflow_run, event=event
|
||||
)
|
||||
node_start_response = self._workflow_cycle_manager._workflow_node_start_to_stream_response(
|
||||
session=session,
|
||||
event=event,
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
|
@ -313,17 +311,14 @@ class WorkflowAppGenerateTaskPipeline:
|
|||
if node_start_response:
|
||||
yield node_start_response
|
||||
elif isinstance(event, QueueNodeSucceededEvent):
|
||||
with Session(db.engine, expire_on_commit=False) as session:
|
||||
workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_success(
|
||||
session=session, event=event
|
||||
event=event
|
||||
)
|
||||
node_success_response = self._workflow_cycle_manager._workflow_node_finish_to_stream_response(
|
||||
session=session,
|
||||
event=event,
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
if node_success_response:
|
||||
yield node_success_response
|
||||
|
@ -334,18 +329,14 @@ class WorkflowAppGenerateTaskPipeline:
|
|||
| QueueNodeInLoopFailedEvent
|
||||
| QueueNodeExceptionEvent,
|
||||
):
|
||||
with Session(db.engine, expire_on_commit=False) as session:
|
||||
workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_failed(
|
||||
session=session,
|
||||
event=event,
|
||||
)
|
||||
node_failed_response = self._workflow_cycle_manager._workflow_node_finish_to_stream_response(
|
||||
session=session,
|
||||
event=event,
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
if node_failed_response:
|
||||
yield node_failed_response
|
||||
|
@ -627,6 +618,7 @@ class WorkflowAppGenerateTaskPipeline:
|
|||
workflow_app_log.created_by = self._user_id
|
||||
|
||||
session.add(workflow_app_log)
|
||||
session.commit()
|
||||
|
||||
def _text_chunk_to_stream_response(
|
||||
self, text: str, from_variable_selector: Optional[list[str]] = None
|
||||
|
|
|
@ -6,7 +6,7 @@ from typing import Any, Optional, Union, cast
|
|||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom, WorkflowAppGenerateEntity
|
||||
from core.app.entities.queue_entities import (
|
||||
|
@ -49,12 +49,14 @@ from core.file import FILE_MODEL_IDENTITY, File
|
|||
from core.model_runtime.utils.encoders import jsonable_encoder
|
||||
from core.ops.entities.trace_entity import TraceTaskName
|
||||
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
|
||||
from core.repository import RepositoryFactory
|
||||
from core.tools.tool_manager import ToolManager
|
||||
from core.workflow.entities.node_entities import NodeRunMetadataKey
|
||||
from core.workflow.enums import SystemVariableKey
|
||||
from core.workflow.nodes import NodeType
|
||||
from core.workflow.nodes.tool.entities import ToolNodeData
|
||||
from core.workflow.workflow_entry import WorkflowEntry
|
||||
from extensions.ext_database import db
|
||||
from models.account import Account
|
||||
from models.enums import CreatedByRole, WorkflowRunTriggeredFrom
|
||||
from models.model import EndUser
|
||||
|
@ -80,6 +82,21 @@ class WorkflowCycleManage:
|
|||
self._application_generate_entity = application_generate_entity
|
||||
self._workflow_system_variables = workflow_system_variables
|
||||
|
||||
# Initialize the session factory and repository
|
||||
# We use the global db engine instead of the session passed to methods
|
||||
# Disable expire_on_commit to avoid the need for merging objects
|
||||
self._session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
|
||||
self._workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository(
|
||||
params={
|
||||
"tenant_id": self._application_generate_entity.app_config.tenant_id,
|
||||
"app_id": self._application_generate_entity.app_config.app_id,
|
||||
"session_factory": self._session_factory,
|
||||
}
|
||||
)
|
||||
|
||||
# We'll still keep the cache for backward compatibility and performance
|
||||
# but use the repository for database operations
|
||||
|
||||
def _handle_workflow_run_start(
|
||||
self,
|
||||
*,
|
||||
|
@ -254,19 +271,15 @@ class WorkflowCycleManage:
|
|||
workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
workflow_run.exceptions_count = exceptions_count
|
||||
|
||||
stmt = select(WorkflowNodeExecution.node_execution_id).where(
|
||||
WorkflowNodeExecution.tenant_id == workflow_run.tenant_id,
|
||||
WorkflowNodeExecution.app_id == workflow_run.app_id,
|
||||
WorkflowNodeExecution.workflow_id == workflow_run.workflow_id,
|
||||
WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value,
|
||||
WorkflowNodeExecution.workflow_run_id == workflow_run.id,
|
||||
WorkflowNodeExecution.status == WorkflowNodeExecutionStatus.RUNNING.value,
|
||||
# Use the instance repository to find running executions for a workflow run
|
||||
running_workflow_node_executions = self._workflow_node_execution_repository.get_running_executions(
|
||||
workflow_run_id=workflow_run.id
|
||||
)
|
||||
ids = session.scalars(stmt).all()
|
||||
# Use self._get_workflow_node_execution here to make sure the cache is updated
|
||||
running_workflow_node_executions = [
|
||||
self._get_workflow_node_execution(session=session, node_execution_id=id) for id in ids if id
|
||||
]
|
||||
|
||||
# Update the cache with the retrieved executions
|
||||
for execution in running_workflow_node_executions:
|
||||
if execution.node_execution_id:
|
||||
self._workflow_node_executions[execution.node_execution_id] = execution
|
||||
|
||||
for workflow_node_execution in running_workflow_node_executions:
|
||||
now = datetime.now(UTC).replace(tzinfo=None)
|
||||
|
@ -288,7 +301,7 @@ class WorkflowCycleManage:
|
|||
return workflow_run
|
||||
|
||||
def _handle_node_execution_start(
|
||||
self, *, session: Session, workflow_run: WorkflowRun, event: QueueNodeStartedEvent
|
||||
self, *, workflow_run: WorkflowRun, event: QueueNodeStartedEvent
|
||||
) -> WorkflowNodeExecution:
|
||||
workflow_node_execution = WorkflowNodeExecution()
|
||||
workflow_node_execution.id = str(uuid4())
|
||||
|
@ -315,17 +328,14 @@ class WorkflowCycleManage:
|
|||
)
|
||||
workflow_node_execution.created_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
|
||||
session.add(workflow_node_execution)
|
||||
# Use the instance repository to save the workflow node execution
|
||||
self._workflow_node_execution_repository.save(workflow_node_execution)
|
||||
|
||||
self._workflow_node_executions[event.node_execution_id] = workflow_node_execution
|
||||
return workflow_node_execution
|
||||
|
||||
def _handle_workflow_node_execution_success(
|
||||
self, *, session: Session, event: QueueNodeSucceededEvent
|
||||
) -> WorkflowNodeExecution:
|
||||
workflow_node_execution = self._get_workflow_node_execution(
|
||||
session=session, node_execution_id=event.node_execution_id
|
||||
)
|
||||
def _handle_workflow_node_execution_success(self, *, event: QueueNodeSucceededEvent) -> WorkflowNodeExecution:
|
||||
workflow_node_execution = self._get_workflow_node_execution(node_execution_id=event.node_execution_id)
|
||||
inputs = WorkflowEntry.handle_special_values(event.inputs)
|
||||
process_data = WorkflowEntry.handle_special_values(event.process_data)
|
||||
outputs = WorkflowEntry.handle_special_values(event.outputs)
|
||||
|
@ -344,13 +354,13 @@ class WorkflowCycleManage:
|
|||
workflow_node_execution.finished_at = finished_at
|
||||
workflow_node_execution.elapsed_time = elapsed_time
|
||||
|
||||
workflow_node_execution = session.merge(workflow_node_execution)
|
||||
# Use the instance repository to update the workflow node execution
|
||||
self._workflow_node_execution_repository.update(workflow_node_execution)
|
||||
return workflow_node_execution
|
||||
|
||||
def _handle_workflow_node_execution_failed(
|
||||
self,
|
||||
*,
|
||||
session: Session,
|
||||
event: QueueNodeFailedEvent
|
||||
| QueueNodeInIterationFailedEvent
|
||||
| QueueNodeInLoopFailedEvent
|
||||
|
@ -361,9 +371,7 @@ class WorkflowCycleManage:
|
|||
:param event: queue node failed event
|
||||
:return:
|
||||
"""
|
||||
workflow_node_execution = self._get_workflow_node_execution(
|
||||
session=session, node_execution_id=event.node_execution_id
|
||||
)
|
||||
workflow_node_execution = self._get_workflow_node_execution(node_execution_id=event.node_execution_id)
|
||||
|
||||
inputs = WorkflowEntry.handle_special_values(event.inputs)
|
||||
process_data = WorkflowEntry.handle_special_values(event.process_data)
|
||||
|
@ -387,14 +395,14 @@ class WorkflowCycleManage:
|
|||
workflow_node_execution.elapsed_time = elapsed_time
|
||||
workflow_node_execution.execution_metadata = execution_metadata
|
||||
|
||||
workflow_node_execution = session.merge(workflow_node_execution)
|
||||
return workflow_node_execution
|
||||
|
||||
def _handle_workflow_node_execution_retried(
|
||||
self, *, session: Session, workflow_run: WorkflowRun, event: QueueNodeRetryEvent
|
||||
self, *, workflow_run: WorkflowRun, event: QueueNodeRetryEvent
|
||||
) -> WorkflowNodeExecution:
|
||||
"""
|
||||
Workflow node execution failed
|
||||
:param workflow_run: workflow run
|
||||
:param event: queue node failed event
|
||||
:return:
|
||||
"""
|
||||
|
@ -439,15 +447,12 @@ class WorkflowCycleManage:
|
|||
workflow_node_execution.execution_metadata = execution_metadata
|
||||
workflow_node_execution.index = event.node_run_index
|
||||
|
||||
session.add(workflow_node_execution)
|
||||
# Use the instance repository to save the workflow node execution
|
||||
self._workflow_node_execution_repository.save(workflow_node_execution)
|
||||
|
||||
self._workflow_node_executions[event.node_execution_id] = workflow_node_execution
|
||||
return workflow_node_execution
|
||||
|
||||
#################################################
|
||||
# to stream responses #
|
||||
#################################################
|
||||
|
||||
def _workflow_start_to_stream_response(
|
||||
self,
|
||||
*,
|
||||
|
@ -455,7 +460,6 @@ class WorkflowCycleManage:
|
|||
task_id: str,
|
||||
workflow_run: WorkflowRun,
|
||||
) -> WorkflowStartStreamResponse:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
return WorkflowStartStreamResponse(
|
||||
task_id=task_id,
|
||||
|
@ -521,14 +525,10 @@ class WorkflowCycleManage:
|
|||
def _workflow_node_start_to_stream_response(
|
||||
self,
|
||||
*,
|
||||
session: Session,
|
||||
event: QueueNodeStartedEvent,
|
||||
task_id: str,
|
||||
workflow_node_execution: WorkflowNodeExecution,
|
||||
) -> Optional[NodeStartStreamResponse]:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
|
||||
if workflow_node_execution.node_type in {NodeType.ITERATION.value, NodeType.LOOP.value}:
|
||||
return None
|
||||
if not workflow_node_execution.workflow_run_id:
|
||||
|
@ -571,7 +571,6 @@ class WorkflowCycleManage:
|
|||
def _workflow_node_finish_to_stream_response(
|
||||
self,
|
||||
*,
|
||||
session: Session,
|
||||
event: QueueNodeSucceededEvent
|
||||
| QueueNodeFailedEvent
|
||||
| QueueNodeInIterationFailedEvent
|
||||
|
@ -580,8 +579,6 @@ class WorkflowCycleManage:
|
|||
task_id: str,
|
||||
workflow_node_execution: WorkflowNodeExecution,
|
||||
) -> Optional[NodeFinishStreamResponse]:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
if workflow_node_execution.node_type in {NodeType.ITERATION.value, NodeType.LOOP.value}:
|
||||
return None
|
||||
if not workflow_node_execution.workflow_run_id:
|
||||
|
@ -621,13 +618,10 @@ class WorkflowCycleManage:
|
|||
def _workflow_node_retry_to_stream_response(
|
||||
self,
|
||||
*,
|
||||
session: Session,
|
||||
event: QueueNodeRetryEvent,
|
||||
task_id: str,
|
||||
workflow_node_execution: WorkflowNodeExecution,
|
||||
) -> Optional[Union[NodeRetryStreamResponse, NodeFinishStreamResponse]]:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
if workflow_node_execution.node_type in {NodeType.ITERATION.value, NodeType.LOOP.value}:
|
||||
return None
|
||||
if not workflow_node_execution.workflow_run_id:
|
||||
|
@ -668,7 +662,6 @@ class WorkflowCycleManage:
|
|||
def _workflow_parallel_branch_start_to_stream_response(
|
||||
self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueParallelBranchRunStartedEvent
|
||||
) -> ParallelBranchStartStreamResponse:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
return ParallelBranchStartStreamResponse(
|
||||
task_id=task_id,
|
||||
|
@ -692,7 +685,6 @@ class WorkflowCycleManage:
|
|||
workflow_run: WorkflowRun,
|
||||
event: QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent,
|
||||
) -> ParallelBranchFinishedStreamResponse:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
return ParallelBranchFinishedStreamResponse(
|
||||
task_id=task_id,
|
||||
|
@ -713,7 +705,6 @@ class WorkflowCycleManage:
|
|||
def _workflow_iteration_start_to_stream_response(
|
||||
self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationStartEvent
|
||||
) -> IterationNodeStartStreamResponse:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
return IterationNodeStartStreamResponse(
|
||||
task_id=task_id,
|
||||
|
@ -735,7 +726,6 @@ class WorkflowCycleManage:
|
|||
def _workflow_iteration_next_to_stream_response(
|
||||
self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationNextEvent
|
||||
) -> IterationNodeNextStreamResponse:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
return IterationNodeNextStreamResponse(
|
||||
task_id=task_id,
|
||||
|
@ -759,7 +749,6 @@ class WorkflowCycleManage:
|
|||
def _workflow_iteration_completed_to_stream_response(
|
||||
self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationCompletedEvent
|
||||
) -> IterationNodeCompletedStreamResponse:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
return IterationNodeCompletedStreamResponse(
|
||||
task_id=task_id,
|
||||
|
@ -790,7 +779,6 @@ class WorkflowCycleManage:
|
|||
def _workflow_loop_start_to_stream_response(
|
||||
self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueLoopStartEvent
|
||||
) -> LoopNodeStartStreamResponse:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
return LoopNodeStartStreamResponse(
|
||||
task_id=task_id,
|
||||
|
@ -812,7 +800,6 @@ class WorkflowCycleManage:
|
|||
def _workflow_loop_next_to_stream_response(
|
||||
self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueLoopNextEvent
|
||||
) -> LoopNodeNextStreamResponse:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
return LoopNodeNextStreamResponse(
|
||||
task_id=task_id,
|
||||
|
@ -836,7 +823,6 @@ class WorkflowCycleManage:
|
|||
def _workflow_loop_completed_to_stream_response(
|
||||
self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueLoopCompletedEvent
|
||||
) -> LoopNodeCompletedStreamResponse:
|
||||
# receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
|
||||
_ = session
|
||||
return LoopNodeCompletedStreamResponse(
|
||||
task_id=task_id,
|
||||
|
@ -934,11 +920,22 @@ class WorkflowCycleManage:
|
|||
|
||||
return workflow_run
|
||||
|
||||
def _get_workflow_node_execution(self, session: Session, node_execution_id: str) -> WorkflowNodeExecution:
|
||||
if node_execution_id not in self._workflow_node_executions:
|
||||
def _get_workflow_node_execution(self, node_execution_id: str) -> WorkflowNodeExecution:
|
||||
# First check the cache for performance
|
||||
if node_execution_id in self._workflow_node_executions:
|
||||
cached_execution = self._workflow_node_executions[node_execution_id]
|
||||
# No need to merge with session since expire_on_commit=False
|
||||
return cached_execution
|
||||
|
||||
# If not in cache, use the instance repository to get by node_execution_id
|
||||
execution = self._workflow_node_execution_repository.get_by_node_execution_id(node_execution_id)
|
||||
|
||||
if not execution:
|
||||
raise ValueError(f"Workflow node execution not found: {node_execution_id}")
|
||||
cached_workflow_node_execution = self._workflow_node_executions[node_execution_id]
|
||||
return session.merge(cached_workflow_node_execution)
|
||||
|
||||
# Update cache
|
||||
self._workflow_node_executions[node_execution_id] = execution
|
||||
return execution
|
||||
|
||||
def _handle_agent_log(self, task_id: str, event: QueueAgentLogEvent) -> AgentLogStreamResponse:
|
||||
"""
|
||||
|
|
|
@ -5,6 +5,7 @@ from datetime import datetime, timedelta
|
|||
from typing import Optional
|
||||
|
||||
from langfuse import Langfuse # type: ignore
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from core.ops.base_trace_instance import BaseTraceInstance
|
||||
from core.ops.entities.config_entity import LangfuseConfig
|
||||
|
@ -28,9 +29,9 @@ from core.ops.langfuse_trace.entities.langfuse_trace_entity import (
|
|||
UnitEnum,
|
||||
)
|
||||
from core.ops.utils import filter_none_values
|
||||
from core.repository.repository_factory import RepositoryFactory
|
||||
from extensions.ext_database import db
|
||||
from models.model import EndUser
|
||||
from models.workflow import WorkflowNodeExecution
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -110,36 +111,18 @@ class LangFuseDataTrace(BaseTraceInstance):
|
|||
)
|
||||
self.add_trace(langfuse_trace_data=trace_data)
|
||||
|
||||
# through workflow_run_id get all_nodes_execution
|
||||
workflow_nodes_execution_id_records = (
|
||||
db.session.query(WorkflowNodeExecution.id)
|
||||
.filter(WorkflowNodeExecution.workflow_run_id == trace_info.workflow_run_id)
|
||||
.all()
|
||||
# through workflow_run_id get all_nodes_execution using repository
|
||||
session_factory = sessionmaker(bind=db.engine)
|
||||
workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository(
|
||||
params={"tenant_id": trace_info.tenant_id, "session_factory": session_factory},
|
||||
)
|
||||
|
||||
for node_execution_id_record in workflow_nodes_execution_id_records:
|
||||
node_execution = (
|
||||
db.session.query(
|
||||
WorkflowNodeExecution.id,
|
||||
WorkflowNodeExecution.tenant_id,
|
||||
WorkflowNodeExecution.app_id,
|
||||
WorkflowNodeExecution.title,
|
||||
WorkflowNodeExecution.node_type,
|
||||
WorkflowNodeExecution.status,
|
||||
WorkflowNodeExecution.inputs,
|
||||
WorkflowNodeExecution.outputs,
|
||||
WorkflowNodeExecution.created_at,
|
||||
WorkflowNodeExecution.elapsed_time,
|
||||
WorkflowNodeExecution.process_data,
|
||||
WorkflowNodeExecution.execution_metadata,
|
||||
)
|
||||
.filter(WorkflowNodeExecution.id == node_execution_id_record.id)
|
||||
.first()
|
||||
# Get all executions for this workflow run
|
||||
workflow_node_executions = workflow_node_execution_repository.get_by_workflow_run(
|
||||
workflow_run_id=trace_info.workflow_run_id
|
||||
)
|
||||
|
||||
if not node_execution:
|
||||
continue
|
||||
|
||||
for node_execution in workflow_node_executions:
|
||||
node_execution_id = node_execution.id
|
||||
tenant_id = node_execution.tenant_id
|
||||
app_id = node_execution.app_id
|
||||
|
|
|
@ -7,6 +7,7 @@ from typing import Optional, cast
|
|||
|
||||
from langsmith import Client
|
||||
from langsmith.schemas import RunBase
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from core.ops.base_trace_instance import BaseTraceInstance
|
||||
from core.ops.entities.config_entity import LangSmithConfig
|
||||
|
@ -27,9 +28,9 @@ from core.ops.langsmith_trace.entities.langsmith_trace_entity import (
|
|||
LangSmithRunUpdateModel,
|
||||
)
|
||||
from core.ops.utils import filter_none_values, generate_dotted_order
|
||||
from core.repository.repository_factory import RepositoryFactory
|
||||
from extensions.ext_database import db
|
||||
from models.model import EndUser, MessageFile
|
||||
from models.workflow import WorkflowNodeExecution
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -134,36 +135,22 @@ class LangSmithDataTrace(BaseTraceInstance):
|
|||
|
||||
self.add_run(langsmith_run)
|
||||
|
||||
# through workflow_run_id get all_nodes_execution
|
||||
workflow_nodes_execution_id_records = (
|
||||
db.session.query(WorkflowNodeExecution.id)
|
||||
.filter(WorkflowNodeExecution.workflow_run_id == trace_info.workflow_run_id)
|
||||
.all()
|
||||
# through workflow_run_id get all_nodes_execution using repository
|
||||
session_factory = sessionmaker(bind=db.engine)
|
||||
workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository(
|
||||
params={
|
||||
"tenant_id": trace_info.tenant_id,
|
||||
"app_id": trace_info.metadata.get("app_id"),
|
||||
"session_factory": session_factory,
|
||||
},
|
||||
)
|
||||
|
||||
for node_execution_id_record in workflow_nodes_execution_id_records:
|
||||
node_execution = (
|
||||
db.session.query(
|
||||
WorkflowNodeExecution.id,
|
||||
WorkflowNodeExecution.tenant_id,
|
||||
WorkflowNodeExecution.app_id,
|
||||
WorkflowNodeExecution.title,
|
||||
WorkflowNodeExecution.node_type,
|
||||
WorkflowNodeExecution.status,
|
||||
WorkflowNodeExecution.inputs,
|
||||
WorkflowNodeExecution.outputs,
|
||||
WorkflowNodeExecution.created_at,
|
||||
WorkflowNodeExecution.elapsed_time,
|
||||
WorkflowNodeExecution.process_data,
|
||||
WorkflowNodeExecution.execution_metadata,
|
||||
)
|
||||
.filter(WorkflowNodeExecution.id == node_execution_id_record.id)
|
||||
.first()
|
||||
# Get all executions for this workflow run
|
||||
workflow_node_executions = workflow_node_execution_repository.get_by_workflow_run(
|
||||
workflow_run_id=trace_info.workflow_run_id
|
||||
)
|
||||
|
||||
if not node_execution:
|
||||
continue
|
||||
|
||||
for node_execution in workflow_node_executions:
|
||||
node_execution_id = node_execution.id
|
||||
tenant_id = node_execution.tenant_id
|
||||
app_id = node_execution.app_id
|
||||
|
|
|
@ -7,6 +7,7 @@ from typing import Optional, cast
|
|||
|
||||
from opik import Opik, Trace
|
||||
from opik.id_helpers import uuid4_to_uuid7
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from core.ops.base_trace_instance import BaseTraceInstance
|
||||
from core.ops.entities.config_entity import OpikConfig
|
||||
|
@ -21,9 +22,9 @@ from core.ops.entities.trace_entity import (
|
|||
TraceTaskName,
|
||||
WorkflowTraceInfo,
|
||||
)
|
||||
from core.repository.repository_factory import RepositoryFactory
|
||||
from extensions.ext_database import db
|
||||
from models.model import EndUser, MessageFile
|
||||
from models.workflow import WorkflowNodeExecution
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -147,36 +148,22 @@ class OpikDataTrace(BaseTraceInstance):
|
|||
}
|
||||
self.add_trace(trace_data)
|
||||
|
||||
# through workflow_run_id get all_nodes_execution
|
||||
workflow_nodes_execution_id_records = (
|
||||
db.session.query(WorkflowNodeExecution.id)
|
||||
.filter(WorkflowNodeExecution.workflow_run_id == trace_info.workflow_run_id)
|
||||
.all()
|
||||
# through workflow_run_id get all_nodes_execution using repository
|
||||
session_factory = sessionmaker(bind=db.engine)
|
||||
workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository(
|
||||
params={
|
||||
"tenant_id": trace_info.tenant_id,
|
||||
"app_id": trace_info.metadata.get("app_id"),
|
||||
"session_factory": session_factory,
|
||||
},
|
||||
)
|
||||
|
||||
for node_execution_id_record in workflow_nodes_execution_id_records:
|
||||
node_execution = (
|
||||
db.session.query(
|
||||
WorkflowNodeExecution.id,
|
||||
WorkflowNodeExecution.tenant_id,
|
||||
WorkflowNodeExecution.app_id,
|
||||
WorkflowNodeExecution.title,
|
||||
WorkflowNodeExecution.node_type,
|
||||
WorkflowNodeExecution.status,
|
||||
WorkflowNodeExecution.inputs,
|
||||
WorkflowNodeExecution.outputs,
|
||||
WorkflowNodeExecution.created_at,
|
||||
WorkflowNodeExecution.elapsed_time,
|
||||
WorkflowNodeExecution.process_data,
|
||||
WorkflowNodeExecution.execution_metadata,
|
||||
)
|
||||
.filter(WorkflowNodeExecution.id == node_execution_id_record.id)
|
||||
.first()
|
||||
# Get all executions for this workflow run
|
||||
workflow_node_executions = workflow_node_execution_repository.get_by_workflow_run(
|
||||
workflow_run_id=trace_info.workflow_run_id
|
||||
)
|
||||
|
||||
if not node_execution:
|
||||
continue
|
||||
|
||||
for node_execution in workflow_node_executions:
|
||||
node_execution_id = node_execution.id
|
||||
tenant_id = node_execution.tenant_id
|
||||
app_id = node_execution.app_id
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
"""
|
||||
Repository interfaces for data access.
|
||||
|
||||
This package contains repository interfaces that define the contract
|
||||
for accessing and manipulating data, regardless of the underlying
|
||||
storage mechanism.
|
||||
"""
|
||||
|
||||
from core.repository.repository_factory import RepositoryFactory
|
||||
from core.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository
|
||||
|
||||
__all__ = [
|
||||
"RepositoryFactory",
|
||||
"WorkflowNodeExecutionRepository",
|
||||
]
|
|
@ -0,0 +1,97 @@
|
|||
"""
|
||||
Repository factory for creating repository instances.
|
||||
|
||||
This module provides a simple factory interface for creating repository instances.
|
||||
It does not contain any implementation details or dependencies on specific repositories.
|
||||
"""
|
||||
|
||||
from collections.abc import Callable, Mapping
|
||||
from typing import Any, Literal, Optional, cast
|
||||
|
||||
from core.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository
|
||||
|
||||
# Type for factory functions - takes a dict of parameters and returns any repository type
|
||||
RepositoryFactoryFunc = Callable[[Mapping[str, Any]], Any]
|
||||
|
||||
# Type for workflow node execution factory function
|
||||
WorkflowNodeExecutionFactoryFunc = Callable[[Mapping[str, Any]], WorkflowNodeExecutionRepository]
|
||||
|
||||
# Repository type literals
|
||||
RepositoryType = Literal["workflow_node_execution"]
|
||||
|
||||
|
||||
class RepositoryFactory:
|
||||
"""
|
||||
Factory class for creating repository instances.
|
||||
|
||||
This factory delegates the actual repository creation to implementation-specific
|
||||
factory functions that are registered with the factory at runtime.
|
||||
"""
|
||||
|
||||
# Dictionary to store factory functions
|
||||
_factory_functions: dict[str, RepositoryFactoryFunc] = {}
|
||||
|
||||
@classmethod
|
||||
def _register_factory(cls, repository_type: RepositoryType, factory_func: RepositoryFactoryFunc) -> None:
|
||||
"""
|
||||
Register a factory function for a specific repository type.
|
||||
This is a private method and should not be called directly.
|
||||
|
||||
Args:
|
||||
repository_type: The type of repository (e.g., 'workflow_node_execution')
|
||||
factory_func: A function that takes parameters and returns a repository instance
|
||||
"""
|
||||
cls._factory_functions[repository_type] = factory_func
|
||||
|
||||
@classmethod
|
||||
def _create_repository(cls, repository_type: RepositoryType, params: Optional[Mapping[str, Any]] = None) -> Any:
|
||||
"""
|
||||
Create a new repository instance with the provided parameters.
|
||||
This is a private method and should not be called directly.
|
||||
|
||||
Args:
|
||||
repository_type: The type of repository to create
|
||||
params: A dictionary of parameters to pass to the factory function
|
||||
|
||||
Returns:
|
||||
A new instance of the requested repository
|
||||
|
||||
Raises:
|
||||
ValueError: If no factory function is registered for the repository type
|
||||
"""
|
||||
if repository_type not in cls._factory_functions:
|
||||
raise ValueError(f"No factory function registered for repository type '{repository_type}'")
|
||||
|
||||
# Use empty dict if params is None
|
||||
params = params or {}
|
||||
|
||||
return cls._factory_functions[repository_type](params)
|
||||
|
||||
@classmethod
|
||||
def register_workflow_node_execution_factory(cls, factory_func: WorkflowNodeExecutionFactoryFunc) -> None:
|
||||
"""
|
||||
Register a factory function for the workflow node execution repository.
|
||||
|
||||
Args:
|
||||
factory_func: A function that takes parameters and returns a WorkflowNodeExecutionRepository instance
|
||||
"""
|
||||
cls._register_factory("workflow_node_execution", factory_func)
|
||||
|
||||
@classmethod
|
||||
def create_workflow_node_execution_repository(
|
||||
cls, params: Optional[Mapping[str, Any]] = None
|
||||
) -> WorkflowNodeExecutionRepository:
|
||||
"""
|
||||
Create a new WorkflowNodeExecutionRepository instance with the provided parameters.
|
||||
|
||||
Args:
|
||||
params: A dictionary of parameters to pass to the factory function
|
||||
|
||||
Returns:
|
||||
A new instance of the WorkflowNodeExecutionRepository
|
||||
|
||||
Raises:
|
||||
ValueError: If no factory function is registered for the workflow_node_execution repository type
|
||||
"""
|
||||
# We can safely cast here because we've registered a WorkflowNodeExecutionFactoryFunc
|
||||
return cast(WorkflowNodeExecutionRepository, cls._create_repository("workflow_node_execution", params))
|
|
@ -0,0 +1,88 @@
|
|||
from collections.abc import Sequence
|
||||
from dataclasses import dataclass
|
||||
from typing import Literal, Optional, Protocol
|
||||
|
||||
from models.workflow import WorkflowNodeExecution
|
||||
|
||||
|
||||
@dataclass
|
||||
class OrderConfig:
|
||||
"""Configuration for ordering WorkflowNodeExecution instances."""
|
||||
|
||||
order_by: list[str]
|
||||
order_direction: Optional[Literal["asc", "desc"]] = None
|
||||
|
||||
|
||||
class WorkflowNodeExecutionRepository(Protocol):
|
||||
"""
|
||||
Repository interface for WorkflowNodeExecution.
|
||||
|
||||
This interface defines the contract for accessing and manipulating
|
||||
WorkflowNodeExecution data, regardless of the underlying storage mechanism.
|
||||
|
||||
Note: Domain-specific concepts like multi-tenancy (tenant_id), application context (app_id),
|
||||
and trigger sources (triggered_from) should be handled at the implementation level, not in
|
||||
the core interface. This keeps the core domain model clean and independent of specific
|
||||
application domains or deployment scenarios.
|
||||
"""
|
||||
|
||||
def save(self, execution: WorkflowNodeExecution) -> None:
|
||||
"""
|
||||
Save a WorkflowNodeExecution instance.
|
||||
|
||||
Args:
|
||||
execution: The WorkflowNodeExecution instance to save
|
||||
"""
|
||||
...
|
||||
|
||||
def get_by_node_execution_id(self, node_execution_id: str) -> Optional[WorkflowNodeExecution]:
|
||||
"""
|
||||
Retrieve a WorkflowNodeExecution by its node_execution_id.
|
||||
|
||||
Args:
|
||||
node_execution_id: The node execution ID
|
||||
|
||||
Returns:
|
||||
The WorkflowNodeExecution instance if found, None otherwise
|
||||
"""
|
||||
...
|
||||
|
||||
def get_by_workflow_run(
|
||||
self,
|
||||
workflow_run_id: str,
|
||||
order_config: Optional[OrderConfig] = None,
|
||||
) -> Sequence[WorkflowNodeExecution]:
|
||||
"""
|
||||
Retrieve all WorkflowNodeExecution instances for a specific workflow run.
|
||||
|
||||
Args:
|
||||
workflow_run_id: The workflow run ID
|
||||
order_config: Optional configuration for ordering results
|
||||
order_config.order_by: List of fields to order by (e.g., ["index", "created_at"])
|
||||
order_config.order_direction: Direction to order ("asc" or "desc")
|
||||
|
||||
Returns:
|
||||
A list of WorkflowNodeExecution instances
|
||||
"""
|
||||
...
|
||||
|
||||
def get_running_executions(self, workflow_run_id: str) -> Sequence[WorkflowNodeExecution]:
|
||||
"""
|
||||
Retrieve all running WorkflowNodeExecution instances for a specific workflow run.
|
||||
|
||||
Args:
|
||||
workflow_run_id: The workflow run ID
|
||||
|
||||
Returns:
|
||||
A list of running WorkflowNodeExecution instances
|
||||
"""
|
||||
...
|
||||
|
||||
def update(self, execution: WorkflowNodeExecution) -> None:
|
||||
"""
|
||||
Update an existing WorkflowNodeExecution instance.
|
||||
|
||||
Args:
|
||||
execution: The WorkflowNodeExecution instance to update
|
||||
"""
|
||||
...
|
|
@ -0,0 +1,18 @@
|
|||
"""
|
||||
Extension for initializing repositories.
|
||||
|
||||
This extension registers repository implementations with the RepositoryFactory.
|
||||
"""
|
||||
|
||||
from dify_app import DifyApp
|
||||
from repositories.repository_registry import register_repositories
|
||||
|
||||
|
||||
def init_app(_app: DifyApp) -> None:
|
||||
"""
|
||||
Initialize repository implementations.
|
||||
|
||||
Args:
|
||||
_app: The Flask application instance (unused)
|
||||
"""
|
||||
register_repositories()
|
|
@ -73,11 +73,7 @@ class Storage:
|
|||
raise ValueError(f"unsupported storage type {storage_type}")
|
||||
|
||||
def save(self, filename, data):
|
||||
try:
|
||||
self.storage_runner.save(filename, data)
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to save file {filename}")
|
||||
raise e
|
||||
|
||||
@overload
|
||||
def load(self, filename: str, /, *, stream: Literal[False] = False) -> bytes: ...
|
||||
|
@ -86,49 +82,25 @@ class Storage:
|
|||
def load(self, filename: str, /, *, stream: Literal[True]) -> Generator: ...
|
||||
|
||||
def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]:
|
||||
try:
|
||||
if stream:
|
||||
return self.load_stream(filename)
|
||||
else:
|
||||
return self.load_once(filename)
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to load file {filename}")
|
||||
raise e
|
||||
|
||||
def load_once(self, filename: str) -> bytes:
|
||||
try:
|
||||
return self.storage_runner.load_once(filename)
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to load_once file {filename}")
|
||||
raise e
|
||||
|
||||
def load_stream(self, filename: str) -> Generator:
|
||||
try:
|
||||
return self.storage_runner.load_stream(filename)
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to load_stream file {filename}")
|
||||
raise e
|
||||
|
||||
def download(self, filename, target_filepath):
|
||||
try:
|
||||
self.storage_runner.download(filename, target_filepath)
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to download file {filename}")
|
||||
raise e
|
||||
|
||||
def exists(self, filename):
|
||||
try:
|
||||
return self.storage_runner.exists(filename)
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to check file exists {filename}")
|
||||
raise e
|
||||
|
||||
def delete(self, filename):
|
||||
try:
|
||||
return self.storage_runner.delete(filename)
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to delete file {filename}")
|
||||
raise e
|
||||
|
||||
|
||||
storage = Storage()
|
||||
|
|
|
@ -510,7 +510,7 @@ class WorkflowRun(Base):
|
|||
)
|
||||
|
||||
|
||||
class WorkflowNodeExecutionTriggeredFrom(Enum):
|
||||
class WorkflowNodeExecutionTriggeredFrom(StrEnum):
|
||||
"""
|
||||
Workflow Node Execution Triggered From Enum
|
||||
"""
|
||||
|
@ -518,21 +518,8 @@ class WorkflowNodeExecutionTriggeredFrom(Enum):
|
|||
SINGLE_STEP = "single-step"
|
||||
WORKFLOW_RUN = "workflow-run"
|
||||
|
||||
@classmethod
|
||||
def value_of(cls, value: str) -> "WorkflowNodeExecutionTriggeredFrom":
|
||||
"""
|
||||
Get value of given mode.
|
||||
|
||||
:param value: mode value
|
||||
:return: mode
|
||||
"""
|
||||
for mode in cls:
|
||||
if mode.value == value:
|
||||
return mode
|
||||
raise ValueError(f"invalid workflow node execution triggered from value {value}")
|
||||
|
||||
|
||||
class WorkflowNodeExecutionStatus(Enum):
|
||||
class WorkflowNodeExecutionStatus(StrEnum):
|
||||
"""
|
||||
Workflow Node Execution Status Enum
|
||||
"""
|
||||
|
@ -543,19 +530,6 @@ class WorkflowNodeExecutionStatus(Enum):
|
|||
EXCEPTION = "exception"
|
||||
RETRY = "retry"
|
||||
|
||||
@classmethod
|
||||
def value_of(cls, value: str) -> "WorkflowNodeExecutionStatus":
|
||||
"""
|
||||
Get value of given mode.
|
||||
|
||||
:param value: mode value
|
||||
:return: mode
|
||||
"""
|
||||
for mode in cls:
|
||||
if mode.value == value:
|
||||
return mode
|
||||
raise ValueError(f"invalid workflow node execution status value {value}")
|
||||
|
||||
|
||||
class WorkflowNodeExecution(Base):
|
||||
"""
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
"""
|
||||
Repository implementations for data access.
|
||||
|
||||
This package contains concrete implementations of the repository interfaces
|
||||
defined in the core.repository package.
|
||||
"""
|
|
@ -0,0 +1,87 @@
|
|||
"""
|
||||
Registry for repository implementations.
|
||||
|
||||
This module is responsible for registering factory functions with the repository factory.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from configs import dify_config
|
||||
from core.repository.repository_factory import RepositoryFactory
|
||||
from extensions.ext_database import db
|
||||
from repositories.workflow_node_execution import SQLAlchemyWorkflowNodeExecutionRepository
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Storage type constants
|
||||
STORAGE_TYPE_RDBMS = "rdbms"
|
||||
STORAGE_TYPE_HYBRID = "hybrid"
|
||||
|
||||
|
||||
def register_repositories() -> None:
|
||||
"""
|
||||
Register repository factory functions with the RepositoryFactory.
|
||||
|
||||
This function reads configuration settings to determine which repository
|
||||
implementations to register.
|
||||
"""
|
||||
# Configure WorkflowNodeExecutionRepository factory based on configuration
|
||||
workflow_node_execution_storage = dify_config.WORKFLOW_NODE_EXECUTION_STORAGE
|
||||
|
||||
# Check storage type and register appropriate implementation
|
||||
if workflow_node_execution_storage == STORAGE_TYPE_RDBMS:
|
||||
# Register SQLAlchemy implementation for RDBMS storage
|
||||
logger.info("Registering WorkflowNodeExecution repository with RDBMS storage")
|
||||
RepositoryFactory.register_workflow_node_execution_factory(create_workflow_node_execution_repository)
|
||||
elif workflow_node_execution_storage == STORAGE_TYPE_HYBRID:
|
||||
# Hybrid storage is not yet implemented
|
||||
raise NotImplementedError("Hybrid storage for WorkflowNodeExecution repository is not yet implemented")
|
||||
else:
|
||||
# Unknown storage type
|
||||
raise ValueError(
|
||||
f"Unknown storage type '{workflow_node_execution_storage}' for WorkflowNodeExecution repository. "
|
||||
f"Supported types: {STORAGE_TYPE_RDBMS}"
|
||||
)
|
||||
|
||||
|
||||
def create_workflow_node_execution_repository(params: Mapping[str, Any]) -> SQLAlchemyWorkflowNodeExecutionRepository:
|
||||
"""
|
||||
Create a WorkflowNodeExecutionRepository instance using SQLAlchemy implementation.
|
||||
|
||||
This factory function creates a repository for the RDBMS storage type.
|
||||
|
||||
Args:
|
||||
params: Parameters for creating the repository, including:
|
||||
- tenant_id: Required. The tenant ID for multi-tenancy.
|
||||
- app_id: Optional. The application ID for filtering.
|
||||
- session_factory: Optional. A SQLAlchemy sessionmaker instance. If not provided,
|
||||
a new sessionmaker will be created using the global database engine.
|
||||
|
||||
Returns:
|
||||
A WorkflowNodeExecutionRepository instance
|
||||
|
||||
Raises:
|
||||
ValueError: If required parameters are missing
|
||||
"""
|
||||
# Extract required parameters
|
||||
tenant_id = params.get("tenant_id")
|
||||
if tenant_id is None:
|
||||
raise ValueError("tenant_id is required for WorkflowNodeExecution repository with RDBMS storage")
|
||||
|
||||
# Extract optional parameters
|
||||
app_id = params.get("app_id")
|
||||
|
||||
# Use the session_factory from params if provided, otherwise create one using the global db engine
|
||||
session_factory = params.get("session_factory")
|
||||
if session_factory is None:
|
||||
# Create a sessionmaker using the same engine as the global db session
|
||||
session_factory = sessionmaker(bind=db.engine)
|
||||
|
||||
# Create and return the repository
|
||||
return SQLAlchemyWorkflowNodeExecutionRepository(
|
||||
session_factory=session_factory, tenant_id=tenant_id, app_id=app_id
|
||||
)
|
|
@ -0,0 +1,9 @@
|
|||
"""
|
||||
WorkflowNodeExecution repository implementations.
|
||||
"""
|
||||
|
||||
from repositories.workflow_node_execution.sqlalchemy_repository import SQLAlchemyWorkflowNodeExecutionRepository
|
||||
|
||||
__all__ = [
|
||||
"SQLAlchemyWorkflowNodeExecutionRepository",
|
||||
]
|
|
@ -0,0 +1,170 @@
|
|||
"""
|
||||
SQLAlchemy implementation of the WorkflowNodeExecutionRepository.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from collections.abc import Sequence
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import UnaryExpression, asc, desc, select
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from core.repository.workflow_node_execution_repository import OrderConfig
|
||||
from models.workflow import WorkflowNodeExecution, WorkflowNodeExecutionStatus, WorkflowNodeExecutionTriggeredFrom
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SQLAlchemyWorkflowNodeExecutionRepository:
|
||||
"""
|
||||
SQLAlchemy implementation of the WorkflowNodeExecutionRepository interface.
|
||||
|
||||
This implementation supports multi-tenancy by filtering operations based on tenant_id.
|
||||
Each method creates its own session, handles the transaction, and commits changes
|
||||
to the database. This prevents long-running connections in the workflow core.
|
||||
"""
|
||||
|
||||
def __init__(self, session_factory: sessionmaker | Engine, tenant_id: str, app_id: Optional[str] = None):
|
||||
"""
|
||||
Initialize the repository with a SQLAlchemy sessionmaker or engine and tenant context.
|
||||
|
||||
Args:
|
||||
session_factory: SQLAlchemy sessionmaker or engine for creating sessions
|
||||
tenant_id: Tenant ID for multi-tenancy
|
||||
app_id: Optional app ID for filtering by application
|
||||
"""
|
||||
# If an engine is provided, create a sessionmaker from it
|
||||
if isinstance(session_factory, Engine):
|
||||
self._session_factory = sessionmaker(bind=session_factory)
|
||||
else:
|
||||
self._session_factory = session_factory
|
||||
|
||||
self._tenant_id = tenant_id
|
||||
self._app_id = app_id
|
||||
|
||||
def save(self, execution: WorkflowNodeExecution) -> None:
|
||||
"""
|
||||
Save a WorkflowNodeExecution instance and commit changes to the database.
|
||||
|
||||
Args:
|
||||
execution: The WorkflowNodeExecution instance to save
|
||||
"""
|
||||
with self._session_factory() as session:
|
||||
# Ensure tenant_id is set
|
||||
if not execution.tenant_id:
|
||||
execution.tenant_id = self._tenant_id
|
||||
|
||||
# Set app_id if provided and not already set
|
||||
if self._app_id and not execution.app_id:
|
||||
execution.app_id = self._app_id
|
||||
|
||||
session.add(execution)
|
||||
session.commit()
|
||||
|
||||
def get_by_node_execution_id(self, node_execution_id: str) -> Optional[WorkflowNodeExecution]:
|
||||
"""
|
||||
Retrieve a WorkflowNodeExecution by its node_execution_id.
|
||||
|
||||
Args:
|
||||
node_execution_id: The node execution ID
|
||||
|
||||
Returns:
|
||||
The WorkflowNodeExecution instance if found, None otherwise
|
||||
"""
|
||||
with self._session_factory() as session:
|
||||
stmt = select(WorkflowNodeExecution).where(
|
||||
WorkflowNodeExecution.node_execution_id == node_execution_id,
|
||||
WorkflowNodeExecution.tenant_id == self._tenant_id,
|
||||
)
|
||||
|
||||
if self._app_id:
|
||||
stmt = stmt.where(WorkflowNodeExecution.app_id == self._app_id)
|
||||
|
||||
return session.scalar(stmt)
|
||||
|
||||
def get_by_workflow_run(
|
||||
self,
|
||||
workflow_run_id: str,
|
||||
order_config: Optional[OrderConfig] = None,
|
||||
) -> Sequence[WorkflowNodeExecution]:
|
||||
"""
|
||||
Retrieve all WorkflowNodeExecution instances for a specific workflow run.
|
||||
|
||||
Args:
|
||||
workflow_run_id: The workflow run ID
|
||||
order_config: Optional configuration for ordering results
|
||||
order_config.order_by: List of fields to order by (e.g., ["index", "created_at"])
|
||||
order_config.order_direction: Direction to order ("asc" or "desc")
|
||||
|
||||
Returns:
|
||||
A list of WorkflowNodeExecution instances
|
||||
"""
|
||||
with self._session_factory() as session:
|
||||
stmt = select(WorkflowNodeExecution).where(
|
||||
WorkflowNodeExecution.workflow_run_id == workflow_run_id,
|
||||
WorkflowNodeExecution.tenant_id == self._tenant_id,
|
||||
WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
|
||||
)
|
||||
|
||||
if self._app_id:
|
||||
stmt = stmt.where(WorkflowNodeExecution.app_id == self._app_id)
|
||||
|
||||
# Apply ordering if provided
|
||||
if order_config and order_config.order_by:
|
||||
order_columns: list[UnaryExpression] = []
|
||||
for field in order_config.order_by:
|
||||
column = getattr(WorkflowNodeExecution, field, None)
|
||||
if not column:
|
||||
continue
|
||||
if order_config.order_direction == "desc":
|
||||
order_columns.append(desc(column))
|
||||
else:
|
||||
order_columns.append(asc(column))
|
||||
|
||||
if order_columns:
|
||||
stmt = stmt.order_by(*order_columns)
|
||||
|
||||
return session.scalars(stmt).all()
|
||||
|
||||
def get_running_executions(self, workflow_run_id: str) -> Sequence[WorkflowNodeExecution]:
|
||||
"""
|
||||
Retrieve all running WorkflowNodeExecution instances for a specific workflow run.
|
||||
|
||||
Args:
|
||||
workflow_run_id: The workflow run ID
|
||||
|
||||
Returns:
|
||||
A list of running WorkflowNodeExecution instances
|
||||
"""
|
||||
with self._session_factory() as session:
|
||||
stmt = select(WorkflowNodeExecution).where(
|
||||
WorkflowNodeExecution.workflow_run_id == workflow_run_id,
|
||||
WorkflowNodeExecution.tenant_id == self._tenant_id,
|
||||
WorkflowNodeExecution.status == WorkflowNodeExecutionStatus.RUNNING,
|
||||
WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
|
||||
)
|
||||
|
||||
if self._app_id:
|
||||
stmt = stmt.where(WorkflowNodeExecution.app_id == self._app_id)
|
||||
|
||||
return session.scalars(stmt).all()
|
||||
|
||||
def update(self, execution: WorkflowNodeExecution) -> None:
|
||||
"""
|
||||
Update an existing WorkflowNodeExecution instance and commit changes to the database.
|
||||
|
||||
Args:
|
||||
execution: The WorkflowNodeExecution instance to update
|
||||
"""
|
||||
with self._session_factory() as session:
|
||||
# Ensure tenant_id is set
|
||||
if not execution.tenant_id:
|
||||
execution.tenant_id = self._tenant_id
|
||||
|
||||
# Set app_id if provided and not already set
|
||||
if self._app_id and not execution.app_id:
|
||||
execution.app_id = self._app_id
|
||||
|
||||
session.merge(execution)
|
||||
session.commit()
|
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
Unit tests for repositories.
|
||||
"""
|
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
Unit tests for workflow_node_execution repositories.
|
||||
"""
|
|
@ -0,0 +1,154 @@
|
|||
"""
|
||||
Unit tests for the SQLAlchemy implementation of WorkflowNodeExecutionRepository.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from core.repository.workflow_node_execution_repository import OrderConfig
|
||||
from models.workflow import WorkflowNodeExecution
|
||||
from repositories.workflow_node_execution.sqlalchemy_repository import SQLAlchemyWorkflowNodeExecutionRepository
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session():
|
||||
"""Create a mock SQLAlchemy session."""
|
||||
session = MagicMock(spec=Session)
|
||||
# Configure the session to be used as a context manager
|
||||
session.__enter__ = MagicMock(return_value=session)
|
||||
session.__exit__ = MagicMock(return_value=None)
|
||||
|
||||
# Configure the session factory to return the session
|
||||
session_factory = MagicMock(spec=sessionmaker)
|
||||
session_factory.return_value = session
|
||||
return session, session_factory
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def repository(session):
|
||||
"""Create a repository instance with test data."""
|
||||
_, session_factory = session
|
||||
tenant_id = "test-tenant"
|
||||
app_id = "test-app"
|
||||
return SQLAlchemyWorkflowNodeExecutionRepository(
|
||||
session_factory=session_factory, tenant_id=tenant_id, app_id=app_id
|
||||
)
|
||||
|
||||
|
||||
def test_save(repository, session):
|
||||
"""Test save method."""
|
||||
session_obj, _ = session
|
||||
# Create a mock execution
|
||||
execution = MagicMock(spec=WorkflowNodeExecution)
|
||||
execution.tenant_id = None
|
||||
execution.app_id = None
|
||||
|
||||
# Call save method
|
||||
repository.save(execution)
|
||||
|
||||
# Assert tenant_id and app_id are set
|
||||
assert execution.tenant_id == repository._tenant_id
|
||||
assert execution.app_id == repository._app_id
|
||||
|
||||
# Assert session.add was called
|
||||
session_obj.add.assert_called_once_with(execution)
|
||||
|
||||
|
||||
def test_save_with_existing_tenant_id(repository, session):
|
||||
"""Test save method with existing tenant_id."""
|
||||
session_obj, _ = session
|
||||
# Create a mock execution with existing tenant_id
|
||||
execution = MagicMock(spec=WorkflowNodeExecution)
|
||||
execution.tenant_id = "existing-tenant"
|
||||
execution.app_id = None
|
||||
|
||||
# Call save method
|
||||
repository.save(execution)
|
||||
|
||||
# Assert tenant_id is not changed and app_id is set
|
||||
assert execution.tenant_id == "existing-tenant"
|
||||
assert execution.app_id == repository._app_id
|
||||
|
||||
# Assert session.add was called
|
||||
session_obj.add.assert_called_once_with(execution)
|
||||
|
||||
|
||||
def test_get_by_node_execution_id(repository, session, mocker: MockerFixture):
|
||||
"""Test get_by_node_execution_id method."""
|
||||
session_obj, _ = session
|
||||
# Set up mock
|
||||
mock_select = mocker.patch("repositories.workflow_node_execution.sqlalchemy_repository.select")
|
||||
mock_stmt = mocker.MagicMock()
|
||||
mock_select.return_value = mock_stmt
|
||||
mock_stmt.where.return_value = mock_stmt
|
||||
session_obj.scalar.return_value = mocker.MagicMock(spec=WorkflowNodeExecution)
|
||||
|
||||
# Call method
|
||||
result = repository.get_by_node_execution_id("test-node-execution-id")
|
||||
|
||||
# Assert select was called with correct parameters
|
||||
mock_select.assert_called_once()
|
||||
session_obj.scalar.assert_called_once_with(mock_stmt)
|
||||
assert result is not None
|
||||
|
||||
|
||||
def test_get_by_workflow_run(repository, session, mocker: MockerFixture):
|
||||
"""Test get_by_workflow_run method."""
|
||||
session_obj, _ = session
|
||||
# Set up mock
|
||||
mock_select = mocker.patch("repositories.workflow_node_execution.sqlalchemy_repository.select")
|
||||
mock_stmt = mocker.MagicMock()
|
||||
mock_select.return_value = mock_stmt
|
||||
mock_stmt.where.return_value = mock_stmt
|
||||
mock_stmt.order_by.return_value = mock_stmt
|
||||
session_obj.scalars.return_value.all.return_value = [mocker.MagicMock(spec=WorkflowNodeExecution)]
|
||||
|
||||
# Call method
|
||||
order_config = OrderConfig(order_by=["index"], order_direction="desc")
|
||||
result = repository.get_by_workflow_run(workflow_run_id="test-workflow-run-id", order_config=order_config)
|
||||
|
||||
# Assert select was called with correct parameters
|
||||
mock_select.assert_called_once()
|
||||
session_obj.scalars.assert_called_once_with(mock_stmt)
|
||||
assert len(result) == 1
|
||||
|
||||
|
||||
def test_get_running_executions(repository, session, mocker: MockerFixture):
|
||||
"""Test get_running_executions method."""
|
||||
session_obj, _ = session
|
||||
# Set up mock
|
||||
mock_select = mocker.patch("repositories.workflow_node_execution.sqlalchemy_repository.select")
|
||||
mock_stmt = mocker.MagicMock()
|
||||
mock_select.return_value = mock_stmt
|
||||
mock_stmt.where.return_value = mock_stmt
|
||||
session_obj.scalars.return_value.all.return_value = [mocker.MagicMock(spec=WorkflowNodeExecution)]
|
||||
|
||||
# Call method
|
||||
result = repository.get_running_executions("test-workflow-run-id")
|
||||
|
||||
# Assert select was called with correct parameters
|
||||
mock_select.assert_called_once()
|
||||
session_obj.scalars.assert_called_once_with(mock_stmt)
|
||||
assert len(result) == 1
|
||||
|
||||
|
||||
def test_update(repository, session):
|
||||
"""Test update method."""
|
||||
session_obj, _ = session
|
||||
# Create a mock execution
|
||||
execution = MagicMock(spec=WorkflowNodeExecution)
|
||||
execution.tenant_id = None
|
||||
execution.app_id = None
|
||||
|
||||
# Call update method
|
||||
repository.update(execution)
|
||||
|
||||
# Assert tenant_id and app_id are set
|
||||
assert execution.tenant_id == repository._tenant_id
|
||||
assert execution.app_id == repository._app_id
|
||||
|
||||
# Assert session.merge was called
|
||||
session_obj.merge.assert_called_once_with(execution)
|
|
@ -744,6 +744,12 @@ MAX_VARIABLE_SIZE=204800
|
|||
WORKFLOW_PARALLEL_DEPTH_LIMIT=3
|
||||
WORKFLOW_FILE_UPLOAD_LIMIT=10
|
||||
|
||||
# Workflow storage configuration
|
||||
# Options: rdbms, hybrid
|
||||
# rdbms: Use only the relational database (default)
|
||||
# hybrid: Save new data to object storage, read from both object storage and RDBMS
|
||||
WORKFLOW_NODE_EXECUTION_STORAGE=rdbms
|
||||
|
||||
# HTTP request node in workflow configuration
|
||||
HTTP_REQUEST_NODE_MAX_BINARY_SIZE=10485760
|
||||
HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576
|
||||
|
|
|
@ -327,6 +327,7 @@ x-shared-env: &shared-api-worker-env
|
|||
MAX_VARIABLE_SIZE: ${MAX_VARIABLE_SIZE:-204800}
|
||||
WORKFLOW_PARALLEL_DEPTH_LIMIT: ${WORKFLOW_PARALLEL_DEPTH_LIMIT:-3}
|
||||
WORKFLOW_FILE_UPLOAD_LIMIT: ${WORKFLOW_FILE_UPLOAD_LIMIT:-10}
|
||||
WORKFLOW_NODE_EXECUTION_STORAGE: ${WORKFLOW_NODE_EXECUTION_STORAGE:-rdbms}
|
||||
HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760}
|
||||
HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576}
|
||||
HTTP_REQUEST_NODE_SSL_VERIFY: ${HTTP_REQUEST_NODE_SSL_VERIFY:-True}
|
||||
|
|
Loading…
Reference in New Issue