feat: support LLM process document file (#10966)

Co-authored-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
非法操作 2024-11-22 19:32:44 +08:00 committed by GitHub
parent 556de444e8
commit 08ac36812b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
37 changed files with 233 additions and 88 deletions

View File

@ -3,7 +3,6 @@ from typing import Optional
from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
from core.file import file_manager
from core.file.models import FileType
from core.model_manager import ModelInstance
from core.model_runtime.entities import (
AssistantPromptMessage,
@ -103,12 +102,11 @@ class TokenBufferMemory:
prompt_message_contents: list[PromptMessageContent] = []
prompt_message_contents.append(TextPromptMessageContent(data=message.query))
for file in file_objs:
if file.type in {FileType.IMAGE, FileType.AUDIO}:
prompt_message = file_manager.to_prompt_message_content(
file,
image_detail_config=detail,
)
prompt_message_contents.append(prompt_message)
prompt_message = file_manager.to_prompt_message_content(
file,
image_detail_config=detail,
)
prompt_message_contents.append(prompt_message)
prompt_messages.append(UserPromptMessage(content=prompt_message_contents))

View File

@ -49,7 +49,7 @@ class PromptMessageFunction(BaseModel):
function: PromptMessageTool
class PromptMessageContentType(Enum):
class PromptMessageContentType(str, Enum):
"""
Enum class for prompt message content type.
"""

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 32767

View File

@ -16,6 +16,7 @@ from PIL import Image
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
DocumentPromptMessageContent,
ImagePromptMessageContent,
PromptMessage,
PromptMessageContentType,
@ -35,6 +36,21 @@ from core.model_runtime.errors.invoke import (
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
GOOGLE_AVAILABLE_MIMETYPE = [
"application/pdf",
"application/x-javascript",
"text/javascript",
"application/x-python",
"text/x-python",
"text/plain",
"text/html",
"text/css",
"text/md",
"text/csv",
"text/xml",
"text/rtf",
]
class GoogleLargeLanguageModel(LargeLanguageModel):
def _invoke(
@ -370,6 +386,12 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
blob = {"inline_data": {"mime_type": mime_type, "data": base64_data}}
glm_content["parts"].append(blob)
elif c.type == PromptMessageContentType.DOCUMENT:
message_content = cast(DocumentPromptMessageContent, c)
if message_content.mime_type not in GOOGLE_AVAILABLE_MIMETYPE:
raise ValueError(f"Unsupported mime type {message_content.mime_type}")
blob = {"inline_data": {"mime_type": message_content.mime_type, "data": message_content.data}}
glm_content["parts"].append(blob)
return glm_content
elif isinstance(message, AssistantPromptMessage):

View File

@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32000

View File

@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32000

View File

@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32768

View File

@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 8000

View File

@ -6,6 +6,7 @@ model_properties:
mode: chat
features:
- vision
- video
parameter_rules:
- name: temperature
use_template: temperature

View File

@ -26,9 +26,15 @@ class NoPromptFoundError(LLMNodeError):
"""Raised when no prompt is found in the LLM configuration."""
class NotSupportedPromptTypeError(LLMNodeError):
"""Raised when the prompt type is not supported."""
class TemplateTypeNotSupportError(LLMNodeError):
def __init__(self, *, type_name: str):
super().__init__(f"Prompt type {type_name} is not supported.")
class MemoryRolePrefixRequiredError(LLMNodeError):
"""Raised when memory role prefix is required for completion model."""
class FileTypeNotSupportError(LLMNodeError):
def __init__(self, *, type_name: str):
super().__init__(f"{type_name} type is not supported by this model")

View File

@ -65,6 +65,7 @@ from .entities import (
ModelConfig,
)
from .exc import (
FileTypeNotSupportError,
InvalidContextStructureError,
InvalidVariableTypeError,
LLMModeRequiredError,
@ -72,7 +73,7 @@ from .exc import (
MemoryRolePrefixRequiredError,
ModelNotExistError,
NoPromptFoundError,
NotSupportedPromptTypeError,
TemplateTypeNotSupportError,
VariableNotFoundError,
)
@ -621,9 +622,7 @@ class LLMNode(BaseNode[LLMNodeData]):
prompt_content = prompt_messages[0].content.replace("#sys.query#", user_query)
prompt_messages[0].content = prompt_content
else:
errmsg = f"Prompt type {type(prompt_template)} is not supported"
logger.warning(errmsg)
raise NotSupportedPromptTypeError(errmsg)
raise TemplateTypeNotSupportError(type_name=str(type(prompt_template)))
if vision_enabled and user_files:
file_prompts = []
@ -671,7 +670,7 @@ class LLMNode(BaseNode[LLMNodeData]):
and ModelFeature.AUDIO not in model_config.model_schema.features
)
):
continue
raise FileTypeNotSupportError(type_name=content_item.type)
prompt_message_content.append(content_item)
if len(prompt_message_content) == 1 and prompt_message_content[0].type == PromptMessageContentType.TEXT:
prompt_message.content = prompt_message_content[0].data

View File

@ -400,59 +400,6 @@ def test_fetch_prompt_messages__basic(faker, llm_node, model_config):
)
},
),
LLMNodeTestScenario(
description="Prompt template with variable selector of File without vision feature",
user_query=fake_query,
user_files=[],
vision_enabled=True,
vision_detail=fake_vision_detail,
features=[],
window_size=fake_window_size,
prompt_template=[
LLMNodeChatModelMessage(
text="{{#input.image#}}",
role=PromptMessageRole.USER,
edition_type="basic",
),
],
expected_messages=mock_history[fake_window_size * -2 :] + [UserPromptMessage(content=fake_query)],
file_variables={
"input.image": File(
tenant_id="test",
type=FileType.IMAGE,
filename="test1.jpg",
transfer_method=FileTransferMethod.REMOTE_URL,
remote_url=fake_remote_url,
)
},
),
LLMNodeTestScenario(
description="Prompt template with variable selector of File with video file and vision feature",
user_query=fake_query,
user_files=[],
vision_enabled=True,
vision_detail=fake_vision_detail,
features=[ModelFeature.VISION],
window_size=fake_window_size,
prompt_template=[
LLMNodeChatModelMessage(
text="{{#input.image#}}",
role=PromptMessageRole.USER,
edition_type="basic",
),
],
expected_messages=mock_history[fake_window_size * -2 :] + [UserPromptMessage(content=fake_query)],
file_variables={
"input.image": File(
tenant_id="test",
type=FileType.VIDEO,
filename="test1.mp4",
transfer_method=FileTransferMethod.REMOTE_URL,
remote_url=fake_remote_url,
extension="mp4",
)
},
),
]
for scenario in test_scenarios:

View File

@ -12,34 +12,46 @@ import ConfigContext from '@/context/debug-configuration'
// import { Resolution } from '@/types/app'
import { useFeatures, useFeaturesStore } from '@/app/components/base/features/hooks'
import Switch from '@/app/components/base/switch'
import type { FileUpload } from '@/app/components/base/features/types'
import { SupportUploadFileTypes } from '@/app/components/workflow/types'
const ConfigVision: FC = () => {
const { t } = useTranslation()
const { isShowVisionConfig } = useContext(ConfigContext)
const { isShowVisionConfig, isAllowVideoUpload } = useContext(ConfigContext)
const file = useFeatures(s => s.features.file)
const featuresStore = useFeaturesStore()
const handleChange = useCallback((data: FileUpload) => {
const isImageEnabled = file?.allowed_file_types?.includes(SupportUploadFileTypes.image) ?? false
const handleChange = useCallback((value: boolean) => {
const {
features,
setFeatures,
} = featuresStore!.getState()
const newFeatures = produce(features, (draft) => {
draft.file = {
...draft.file,
enabled: data.enabled,
image: {
enabled: data.enabled,
detail: data.image?.detail,
transfer_methods: data.image?.transfer_methods,
number_limits: data.image?.number_limits,
},
if (value) {
draft.file!.allowed_file_types = Array.from(new Set([
...(draft.file?.allowed_file_types || []),
SupportUploadFileTypes.image,
...(isAllowVideoUpload ? [SupportUploadFileTypes.video] : []),
]))
}
else {
draft.file!.allowed_file_types = draft.file!.allowed_file_types?.filter(
type => type !== SupportUploadFileTypes.image && (isAllowVideoUpload ? type !== SupportUploadFileTypes.video : true),
)
}
if (draft.file) {
draft.file.enabled = (draft.file.allowed_file_types?.length ?? 0) > 0
draft.file.image = {
...(draft.file.image || {}),
enabled: value,
}
}
})
setFeatures(newFeatures)
}, [featuresStore])
}, [featuresStore, isAllowVideoUpload])
if (!isShowVisionConfig)
return null
@ -89,11 +101,8 @@ const ConfigVision: FC = () => {
<ParamConfig />
<div className='ml-1 mr-3 w-[1px] h-3.5 bg-divider-subtle'></div>
<Switch
defaultValue={file?.enabled}
onChange={value => handleChange({
...(file || {}),
enabled: value,
})}
defaultValue={isImageEnabled}
onChange={handleChange}
size='md'
/>
</div>

View File

@ -0,0 +1,78 @@
'use client'
import type { FC } from 'react'
import React, { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import produce from 'immer'
import { useContext } from 'use-context-selector'
import { Document } from '@/app/components/base/icons/src/vender/features'
import Tooltip from '@/app/components/base/tooltip'
import ConfigContext from '@/context/debug-configuration'
import { SupportUploadFileTypes } from '@/app/components/workflow/types'
import { useFeatures, useFeaturesStore } from '@/app/components/base/features/hooks'
import Switch from '@/app/components/base/switch'
const ConfigDocument: FC = () => {
const { t } = useTranslation()
const file = useFeatures(s => s.features.file)
const featuresStore = useFeaturesStore()
const { isShowDocumentConfig } = useContext(ConfigContext)
const isDocumentEnabled = file?.allowed_file_types?.includes(SupportUploadFileTypes.document) ?? false
const handleChange = useCallback((value: boolean) => {
const {
features,
setFeatures,
} = featuresStore!.getState()
const newFeatures = produce(features, (draft) => {
if (value) {
draft.file!.allowed_file_types = Array.from(new Set([
...(draft.file?.allowed_file_types || []),
SupportUploadFileTypes.document,
]))
}
else {
draft.file!.allowed_file_types = draft.file!.allowed_file_types?.filter(
type => type !== SupportUploadFileTypes.document,
)
}
if (draft.file)
draft.file.enabled = (draft.file.allowed_file_types?.length ?? 0) > 0
})
setFeatures(newFeatures)
}, [featuresStore])
if (!isShowDocumentConfig)
return null
return (
<div className='mt-2 flex items-center gap-2 p-2 rounded-xl border-t-[0.5px] border-l-[0.5px] bg-background-section-burn'>
<div className='shrink-0 p-1'>
<div className='p-1 rounded-lg border-[0.5px] border-divider-subtle shadow-xs bg-util-colors-indigo-indigo-600'>
<Document className='w-4 h-4 text-text-primary-on-surface' />
</div>
</div>
<div className='grow flex items-center'>
<div className='mr-1 text-text-secondary system-sm-semibold'>{t('appDebug.feature.documentUpload.title')}</div>
<Tooltip
popupContent={
<div className='w-[180px]' >
{t('appDebug.feature.documentUpload.description')}
</div>
}
/>
</div>
<div className='shrink-0 flex items-center'>
<div className='ml-1 mr-3 w-[1px] h-3.5 bg-divider-subtle'></div>
<Switch
defaultValue={isDocumentEnabled}
onChange={handleChange}
size='md'
/>
</div>
</div>
)
}
export default React.memo(ConfigDocument)

View File

@ -7,6 +7,7 @@ import { useFormattingChangedDispatcher } from '../debug/hooks'
import DatasetConfig from '../dataset-config'
import HistoryPanel from '../config-prompt/conversation-history/history-panel'
import ConfigVision from '../config-vision'
import ConfigDocument from './config-document'
import AgentTools from './agent/agent-tools'
import ConfigContext from '@/context/debug-configuration'
import ConfigPrompt from '@/app/components/app/configuration/config-prompt'
@ -82,6 +83,8 @@ const Config: FC = () => {
<ConfigVision />
<ConfigDocument />
{/* Chat History */}
{isAdvancedMode && isChatApp && modelModeType === ModelModeType.completion && (
<HistoryPanel

View File

@ -451,7 +451,8 @@ const Configuration: FC = () => {
}
const isShowVisionConfig = !!currModel?.features?.includes(ModelFeatureEnum.vision)
const isShowDocumentConfig = !!currModel?.features?.includes(ModelFeatureEnum.document)
const isAllowVideoUpload = !!currModel?.features?.includes(ModelFeatureEnum.video)
// *** web app features ***
const featuresData: FeaturesData = useMemo(() => {
return {
@ -472,7 +473,7 @@ const Configuration: FC = () => {
transfer_methods: modelConfig.file_upload?.image?.transfer_methods || ['local_file', 'remote_url'],
},
enabled: !!(modelConfig.file_upload?.enabled || modelConfig.file_upload?.image?.enabled),
allowed_file_types: modelConfig.file_upload?.allowed_file_types || [SupportUploadFileTypes.image, SupportUploadFileTypes.video],
allowed_file_types: modelConfig.file_upload?.allowed_file_types || [],
allowed_file_extensions: modelConfig.file_upload?.allowed_file_extensions || [...FILE_EXTS[SupportUploadFileTypes.image], ...FILE_EXTS[SupportUploadFileTypes.video]].map(ext => `.${ext}`),
allowed_file_upload_methods: modelConfig.file_upload?.allowed_file_upload_methods || modelConfig.file_upload?.image?.transfer_methods || ['local_file', 'remote_url'],
number_limits: modelConfig.file_upload?.number_limits || modelConfig.file_upload?.image?.number_limits || 3,
@ -861,6 +862,8 @@ const Configuration: FC = () => {
isShowVisionConfig,
visionConfig,
setVisionConfig: handleSetVisionConfig,
isAllowVideoUpload,
isShowDocumentConfig,
rerankSettingModalOpen,
setRerankSettingModalOpen,
}}

View File

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor">
<path d="M20 22H4C3.44772 22 3 21.5523 3 21V3C3 2.44772 3.44772 2 4 2H20C20.5523 2 21 2.44772 21 3V21C21 21.5523 20.5523 22 20 22ZM7 6V10H11V6H7ZM7 12V14H17V12H7ZM7 16V18H17V16H7ZM13 7V9H17V7H13Z"></path>
</svg>

After

Width:  |  Height:  |  Size: 292 B

View File

@ -0,0 +1,23 @@
{
"icon": {
"type": "element",
"isRootNode": true,
"name": "svg",
"attributes": {
"xmlns": "http://www.w3.org/2000/svg",
"viewBox": "0 0 24 24",
"fill": "currentColor"
},
"children": [
{
"type": "element",
"name": "path",
"attributes": {
"d": "M20 22H4C3.44772 22 3 21.5523 3 21V3C3 2.44772 3.44772 2 4 2H20C20.5523 2 21 2.44772 21 3V21C21 21.5523 20.5523 22 20 22ZM7 6V10H11V6H7ZM7 12V14H17V12H7ZM7 16V18H17V16H7ZM13 7V9H17V7H13Z"
},
"children": []
}
]
},
"name": "Document"
}

View File

@ -0,0 +1,16 @@
// GENERATE BY script
// DON NOT EDIT IT MANUALLY
import * as React from 'react'
import data from './Document.json'
import IconBase from '@/app/components/base/icons/IconBase'
import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
props,
ref,
) => <IconBase {...props} ref={ref} data={data as IconData} />)
Icon.displayName = 'Document'
export default Icon

View File

@ -7,3 +7,4 @@ export { default as Microphone01 } from './Microphone01'
export { default as TextToAudio } from './TextToAudio'
export { default as VirtualAssistant } from './VirtualAssistant'
export { default as Vision } from './Vision'
export { default as Document } from './Document'

View File

@ -52,6 +52,9 @@ export enum ModelFeatureEnum {
multiToolCall = 'multi-tool-call',
agentThought = 'agent-thought',
vision = 'vision',
video = 'video',
document = 'document',
audio = 'audio',
}
export enum ModelFeatureTextEnum {
@ -59,6 +62,9 @@ export enum ModelFeatureTextEnum {
multiToolCall = 'Multi Tool Call',
agentThought = 'Agent Thought',
vision = 'Vision',
video = 'Video',
document = 'Document',
audio = 'Audio',
}
export enum ModelStatusEnum {

View File

@ -97,6 +97,8 @@ type IDebugConfiguration = {
isShowVisionConfig: boolean
visionConfig: VisionSettings
setVisionConfig: (visionConfig: VisionSettings, noNotice?: boolean) => void
isAllowVideoUpload: boolean
isShowDocumentConfig: boolean
rerankSettingModalOpen: boolean
setRerankSettingModalOpen: (rerankSettingModalOpen: boolean) => void
}
@ -244,6 +246,8 @@ const DebugConfigurationContext = createContext<IDebugConfiguration>({
transfer_methods: [TransferMethod.remote_url],
},
setVisionConfig: () => { },
isAllowVideoUpload: false,
isShowDocumentConfig: false,
rerankSettingModalOpen: false,
setRerankSettingModalOpen: () => { },
})

View File

@ -218,6 +218,10 @@ const translation = {
enableText: 'Features Enabled',
manage: 'Manage',
},
documentUpload: {
title: 'Document',
description: 'Enable Document will allows the model to take in documents and answer questions about them.',
},
},
codegen: {
title: 'Code Generator',

View File

@ -218,6 +218,10 @@ const translation = {
enableText: '功能已开启',
manage: '管理',
},
documentUpload: {
title: '文档',
description: '启用文档后,模型可以接收文档并回答关于它们的问题。',
},
},
codegen: {
title: '代码生成器',