feat: support single run doc extractor node (#11318)

This commit is contained in:
非法操作 2025-01-08 15:20:15 +08:00 committed by GitHub
parent 0a49d3dd52
commit d649037c3e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 103 additions and 3 deletions

View File

@ -5,7 +5,8 @@ import logging
import operator
import os
import tempfile
from typing import cast
from collections.abc import Mapping, Sequence
from typing import Any, cast
import docx
import pandas as pd
@ -81,6 +82,23 @@ class DocumentExtractorNode(BaseNode[DocumentExtractorNodeData]):
process_data=process_data,
)
@classmethod
def _extract_variable_selector_to_variable_mapping(
cls,
*,
graph_config: Mapping[str, Any],
node_id: str,
node_data: DocumentExtractorNodeData,
) -> Mapping[str, Sequence[str]]:
"""
Extract variable selector to variable mapping
:param graph_config: graph config
:param node_id: node id
:param node_data: node data
:return:
"""
return {node_id + ".files": node_data.variable_selector}
def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
"""Extract text from a file based on its MIME type."""

View File

@ -27,6 +27,7 @@ import VariableAssigner from '@/app/components/workflow/nodes/variable-assigner/
import Assigner from '@/app/components/workflow/nodes/assigner/default'
import ParameterExtractorDefault from '@/app/components/workflow/nodes/parameter-extractor/default'
import IterationDefault from '@/app/components/workflow/nodes/iteration/default'
import DocumentExtractorDefault from '@/app/components/workflow/nodes/document-extractor/default'
import { ssePost } from '@/service/base'
import { getInputVars as doGetInputVars } from '@/app/components/base/prompt-editor/constants'
@ -43,6 +44,7 @@ const { checkValid: checkVariableAssignerValid } = VariableAssigner
const { checkValid: checkAssignerValid } = Assigner
const { checkValid: checkParameterExtractorValid } = ParameterExtractorDefault
const { checkValid: checkIterationValid } = IterationDefault
const { checkValid: checkDocumentExtractorValid } = DocumentExtractorDefault
const checkValidFns: Record<BlockEnum, Function> = {
[BlockEnum.LLM]: checkLLMValid,
@ -57,6 +59,7 @@ const checkValidFns: Record<BlockEnum, Function> = {
[BlockEnum.VariableAggregator]: checkVariableAssignerValid,
[BlockEnum.ParameterExtractor]: checkParameterExtractorValid,
[BlockEnum.Iteration]: checkIterationValid,
[BlockEnum.DocExtractor]: checkDocumentExtractorValid,
} as any
type Params<T> = {

View File

@ -11,9 +11,11 @@ import useConfig from './use-config'
import type { DocExtractorNodeType } from './types'
import { fetchSupportFileTypes } from '@/service/datasets'
import Field from '@/app/components/workflow/nodes/_base/components/field'
import { BlockEnum, type NodePanelProps } from '@/app/components/workflow/types'
import { BlockEnum, InputVarType, type NodePanelProps } from '@/app/components/workflow/types'
import I18n from '@/context/i18n'
import { LanguagesSupported } from '@/i18n/language'
import BeforeRunForm from '@/app/components/workflow/nodes/_base/components/before-run-form'
import ResultPanel from '@/app/components/workflow/run/result-panel'
const i18nPrefix = 'workflow.nodes.docExtractor'
@ -46,6 +48,15 @@ const Panel: FC<NodePanelProps<DocExtractorNodeType>> = ({
inputs,
handleVarChanges,
filterVar,
// single run
isShowSingleRun,
hideSingleRun,
runningStatus,
handleRun,
handleStop,
runResult,
files,
setFiles,
} = useConfig(id, data)
return (
@ -81,6 +92,30 @@ const Panel: FC<NodePanelProps<DocExtractorNodeType>> = ({
/>
</OutputVars>
</div>
{
isShowSingleRun && (
<BeforeRunForm
nodeName={inputs.title}
onHide={hideSingleRun}
forms={[
{
inputs: [{
label: t(`${i18nPrefix}.inputVar`)!,
variable: 'files',
type: InputVarType.multiFiles,
required: true,
}],
values: { files },
onChange: keyValue => setFiles((keyValue as any).files),
},
]}
runningStatus={runningStatus}
onRun={handleRun}
onStop={handleStop}
result={<ResultPanel {...runResult} showSteps={false} />}
/>
)
}
</div>
)
}

View File

@ -3,9 +3,10 @@ import produce from 'immer'
import { useStoreApi } from 'reactflow'
import type { ValueSelector, Var } from '../../types'
import { VarType } from '../../types'
import { InputVarType, VarType } from '../../types'
import { type DocExtractorNodeType } from './types'
import useNodeCrud from '@/app/components/workflow/nodes/_base/hooks/use-node-crud'
import useOneStepRun from '@/app/components/workflow/nodes/_base/hooks/use-one-step-run'
import {
useIsChatMode,
useNodesReadOnly,
@ -55,11 +56,53 @@ const useConfig = (id: string, payload: DocExtractorNodeType) => {
setInputs(newInputs)
}, [getType, inputs, setInputs])
// single run
const {
isShowSingleRun,
hideSingleRun,
runningStatus,
isCompleted,
handleRun,
handleStop,
runInputData,
setRunInputData,
runResult,
} = useOneStepRun<DocExtractorNodeType>({
id,
data: inputs,
defaultRunInputData: { files: [] },
})
const varInputs = [{
label: inputs.title,
variable: 'files',
type: InputVarType.multiFiles,
required: true,
}]
const files = runInputData.files
const setFiles = useCallback((newFiles: []) => {
setRunInputData({
...runInputData,
files: newFiles,
})
}, [runInputData, setRunInputData])
return {
readOnly,
inputs,
filterVar,
handleVarChanges,
// single run
isShowSingleRun,
hideSingleRun,
runningStatus,
isCompleted,
handleRun,
handleStop,
varInputs,
files,
setFiles,
runResult,
}
}

View File

@ -382,6 +382,7 @@ export const canRunBySingle = (nodeType: BlockEnum) => {
|| nodeType === BlockEnum.Tool
|| nodeType === BlockEnum.ParameterExtractor
|| nodeType === BlockEnum.Iteration
|| nodeType === BlockEnum.DocExtractor
}
type ConnectedSourceOrTargetNodesChange = {