blocks(exa): Add more Exa blocks (#9097)
Revamp the Exa search block and add two more for Content and Similarity search. ### Changes 🏗️ - Updated the exa search block input names to be snakecase not camel case - Added Advanced to non required fields - Pulled Content settings into helpers for reuse across blocks - Updated customnode.css to handle long inputs, especially in the case of the date input ### Checklist 📋 #### For code changes: - [ ] I have clearly listed my changes in the PR description - [ ] I have made a test plan - [ ] I have tested my changes according to the test plan: <!-- Put your test plan here: --> - [ ] ...
This commit is contained in:
parent
a8339d0748
commit
54f8d3b4dd
|
@ -0,0 +1,87 @@
|
|||
from typing import List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from backend.blocks.exa._auth import (
|
||||
ExaCredentials,
|
||||
ExaCredentialsField,
|
||||
ExaCredentialsInput,
|
||||
)
|
||||
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
|
||||
from backend.data.model import SchemaField
|
||||
from backend.util.request import requests
|
||||
|
||||
|
||||
class ContentRetrievalSettings(BaseModel):
|
||||
text: Optional[dict] = SchemaField(
|
||||
description="Text content settings",
|
||||
default={"maxCharacters": 1000, "includeHtmlTags": False},
|
||||
advanced=True,
|
||||
)
|
||||
highlights: Optional[dict] = SchemaField(
|
||||
description="Highlight settings",
|
||||
default={
|
||||
"numSentences": 3,
|
||||
"highlightsPerUrl": 3,
|
||||
"query": "",
|
||||
},
|
||||
advanced=True,
|
||||
)
|
||||
summary: Optional[dict] = SchemaField(
|
||||
description="Summary settings",
|
||||
default={"query": ""},
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
|
||||
class ExaContentsBlock(Block):
|
||||
class Input(BlockSchema):
|
||||
credentials: ExaCredentialsInput = ExaCredentialsField()
|
||||
ids: List[str] = SchemaField(
|
||||
description="Array of document IDs obtained from searches",
|
||||
)
|
||||
contents: ContentRetrievalSettings = SchemaField(
|
||||
description="Content retrieval settings",
|
||||
default=ContentRetrievalSettings(),
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
results: list = SchemaField(
|
||||
description="List of document contents",
|
||||
default=[],
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="c52be83f-f8cd-4180-b243-af35f986b461",
|
||||
description="Retrieves document contents using Exa's contents API",
|
||||
categories={BlockCategory.SEARCH},
|
||||
input_schema=ExaContentsBlock.Input,
|
||||
output_schema=ExaContentsBlock.Output,
|
||||
)
|
||||
|
||||
def run(
|
||||
self, input_data: Input, *, credentials: ExaCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
url = "https://api.exa.ai/contents"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"x-api-key": credentials.api_key.get_secret_value(),
|
||||
}
|
||||
|
||||
payload = {
|
||||
"ids": input_data.ids,
|
||||
"text": input_data.contents.text,
|
||||
"highlights": input_data.contents.highlights,
|
||||
"summary": input_data.contents.summary,
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
yield "results", data.get("results", [])
|
||||
except Exception as e:
|
||||
yield "error", str(e)
|
||||
yield "results", []
|
|
@ -0,0 +1,54 @@
|
|||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from backend.data.model import SchemaField
|
||||
|
||||
|
||||
class TextSettings(BaseModel):
|
||||
max_characters: int = SchemaField(
|
||||
default=1000,
|
||||
description="Maximum number of characters to return",
|
||||
placeholder="1000",
|
||||
)
|
||||
include_html_tags: bool = SchemaField(
|
||||
default=False,
|
||||
description="Whether to include HTML tags in the text",
|
||||
placeholder="False",
|
||||
)
|
||||
|
||||
|
||||
class HighlightSettings(BaseModel):
|
||||
num_sentences: int = SchemaField(
|
||||
default=3,
|
||||
description="Number of sentences per highlight",
|
||||
placeholder="3",
|
||||
)
|
||||
highlights_per_url: int = SchemaField(
|
||||
default=3,
|
||||
description="Number of highlights per URL",
|
||||
placeholder="3",
|
||||
)
|
||||
|
||||
|
||||
class SummarySettings(BaseModel):
|
||||
query: Optional[str] = SchemaField(
|
||||
default="",
|
||||
description="Query string for summarization",
|
||||
placeholder="Enter query",
|
||||
)
|
||||
|
||||
|
||||
class ContentSettings(BaseModel):
|
||||
text: TextSettings = SchemaField(
|
||||
default=TextSettings(),
|
||||
description="Text content settings",
|
||||
)
|
||||
highlights: HighlightSettings = SchemaField(
|
||||
default=HighlightSettings(),
|
||||
description="Highlight settings",
|
||||
)
|
||||
summary: SummarySettings = SchemaField(
|
||||
default=SummarySettings(),
|
||||
description="Summary settings",
|
||||
)
|
|
@ -1,84 +1,76 @@
|
|||
from datetime import datetime
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from backend.blocks.exa._auth import (
|
||||
ExaCredentials,
|
||||
ExaCredentialsField,
|
||||
ExaCredentialsInput,
|
||||
)
|
||||
from backend.blocks.exa.helpers import ContentSettings
|
||||
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
|
||||
from backend.data.model import SchemaField
|
||||
from backend.util.request import requests
|
||||
|
||||
|
||||
class ContentSettings(BaseModel):
|
||||
text: dict = SchemaField(
|
||||
description="Text content settings",
|
||||
default={"maxCharacters": 1000, "includeHtmlTags": False},
|
||||
)
|
||||
highlights: dict = SchemaField(
|
||||
description="Highlight settings",
|
||||
default={"numSentences": 3, "highlightsPerUrl": 3},
|
||||
)
|
||||
summary: dict = SchemaField(
|
||||
description="Summary settings",
|
||||
default={"query": ""},
|
||||
)
|
||||
|
||||
|
||||
class ExaSearchBlock(Block):
|
||||
class Input(BlockSchema):
|
||||
credentials: ExaCredentialsInput = ExaCredentialsField()
|
||||
query: str = SchemaField(description="The search query")
|
||||
useAutoprompt: bool = SchemaField(
|
||||
use_auto_prompt: bool = SchemaField(
|
||||
description="Whether to use autoprompt",
|
||||
default=True,
|
||||
advanced=True,
|
||||
)
|
||||
type: str = SchemaField(
|
||||
description="Type of search",
|
||||
default="",
|
||||
advanced=True,
|
||||
)
|
||||
category: str = SchemaField(
|
||||
description="Category to search within",
|
||||
default="",
|
||||
advanced=True,
|
||||
)
|
||||
numResults: int = SchemaField(
|
||||
number_of_results: int = SchemaField(
|
||||
description="Number of results to return",
|
||||
default=10,
|
||||
advanced=True,
|
||||
)
|
||||
includeDomains: List[str] = SchemaField(
|
||||
include_domains: List[str] = SchemaField(
|
||||
description="Domains to include in search",
|
||||
default=[],
|
||||
)
|
||||
excludeDomains: List[str] = SchemaField(
|
||||
exclude_domains: List[str] = SchemaField(
|
||||
description="Domains to exclude from search",
|
||||
default=[],
|
||||
advanced=True,
|
||||
)
|
||||
startCrawlDate: datetime = SchemaField(
|
||||
start_crawl_date: datetime = SchemaField(
|
||||
description="Start date for crawled content",
|
||||
)
|
||||
endCrawlDate: datetime = SchemaField(
|
||||
end_crawl_date: datetime = SchemaField(
|
||||
description="End date for crawled content",
|
||||
)
|
||||
startPublishedDate: datetime = SchemaField(
|
||||
start_published_date: datetime = SchemaField(
|
||||
description="Start date for published content",
|
||||
)
|
||||
endPublishedDate: datetime = SchemaField(
|
||||
end_published_date: datetime = SchemaField(
|
||||
description="End date for published content",
|
||||
)
|
||||
includeText: List[str] = SchemaField(
|
||||
include_text: List[str] = SchemaField(
|
||||
description="Text patterns to include",
|
||||
default=[],
|
||||
advanced=True,
|
||||
)
|
||||
excludeText: List[str] = SchemaField(
|
||||
exclude_text: List[str] = SchemaField(
|
||||
description="Text patterns to exclude",
|
||||
default=[],
|
||||
advanced=True,
|
||||
)
|
||||
contents: ContentSettings = SchemaField(
|
||||
description="Content retrieval settings",
|
||||
default=ContentSettings(),
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
|
@ -107,44 +99,38 @@ class ExaSearchBlock(Block):
|
|||
|
||||
payload = {
|
||||
"query": input_data.query,
|
||||
"useAutoprompt": input_data.useAutoprompt,
|
||||
"numResults": input_data.numResults,
|
||||
"contents": {
|
||||
"text": {"maxCharacters": 1000, "includeHtmlTags": False},
|
||||
"highlights": {
|
||||
"numSentences": 3,
|
||||
"highlightsPerUrl": 3,
|
||||
},
|
||||
"summary": {"query": ""},
|
||||
},
|
||||
"useAutoprompt": input_data.use_auto_prompt,
|
||||
"numResults": input_data.number_of_results,
|
||||
"contents": input_data.contents.dict(),
|
||||
}
|
||||
|
||||
date_field_mapping = {
|
||||
"start_crawl_date": "startCrawlDate",
|
||||
"end_crawl_date": "endCrawlDate",
|
||||
"start_published_date": "startPublishedDate",
|
||||
"end_published_date": "endPublishedDate",
|
||||
}
|
||||
|
||||
# Add dates if they exist
|
||||
date_fields = [
|
||||
"startCrawlDate",
|
||||
"endCrawlDate",
|
||||
"startPublishedDate",
|
||||
"endPublishedDate",
|
||||
]
|
||||
for field in date_fields:
|
||||
value = getattr(input_data, field, None)
|
||||
for input_field, api_field in date_field_mapping.items():
|
||||
value = getattr(input_data, input_field, None)
|
||||
if value:
|
||||
payload[field] = value.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||
payload[api_field] = value.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||
|
||||
optional_field_mapping = {
|
||||
"type": "type",
|
||||
"category": "category",
|
||||
"include_domains": "includeDomains",
|
||||
"exclude_domains": "excludeDomains",
|
||||
"include_text": "includeText",
|
||||
"exclude_text": "excludeText",
|
||||
}
|
||||
|
||||
# Add other fields
|
||||
optional_fields = [
|
||||
"type",
|
||||
"category",
|
||||
"includeDomains",
|
||||
"excludeDomains",
|
||||
"includeText",
|
||||
"excludeText",
|
||||
]
|
||||
|
||||
for field in optional_fields:
|
||||
value = getattr(input_data, field)
|
||||
for input_field, api_field in optional_field_mapping.items():
|
||||
value = getattr(input_data, input_field)
|
||||
if value: # Only add non-empty values
|
||||
payload[field] = value
|
||||
payload[api_field] = value
|
||||
|
||||
try:
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
from datetime import datetime
|
||||
from typing import Any, List
|
||||
|
||||
from backend.blocks.exa._auth import (
|
||||
ExaCredentials,
|
||||
ExaCredentialsField,
|
||||
ExaCredentialsInput,
|
||||
)
|
||||
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
|
||||
from backend.data.model import SchemaField
|
||||
from backend.util.request import requests
|
||||
|
||||
from .helpers import ContentSettings
|
||||
|
||||
|
||||
class ExaFindSimilarBlock(Block):
|
||||
class Input(BlockSchema):
|
||||
credentials: ExaCredentialsInput = ExaCredentialsField()
|
||||
url: str = SchemaField(
|
||||
description="The url for which you would like to find similar links"
|
||||
)
|
||||
number_of_results: int = SchemaField(
|
||||
description="Number of results to return",
|
||||
default=10,
|
||||
advanced=True,
|
||||
)
|
||||
include_domains: List[str] = SchemaField(
|
||||
description="Domains to include in search",
|
||||
default=[],
|
||||
advanced=True,
|
||||
)
|
||||
exclude_domains: List[str] = SchemaField(
|
||||
description="Domains to exclude from search",
|
||||
default=[],
|
||||
advanced=True,
|
||||
)
|
||||
start_crawl_date: datetime = SchemaField(
|
||||
description="Start date for crawled content",
|
||||
)
|
||||
end_crawl_date: datetime = SchemaField(
|
||||
description="End date for crawled content",
|
||||
)
|
||||
start_published_date: datetime = SchemaField(
|
||||
description="Start date for published content",
|
||||
)
|
||||
end_published_date: datetime = SchemaField(
|
||||
description="End date for published content",
|
||||
)
|
||||
include_text: List[str] = SchemaField(
|
||||
description="Text patterns to include (max 1 string, up to 5 words)",
|
||||
default=[],
|
||||
advanced=True,
|
||||
)
|
||||
exclude_text: List[str] = SchemaField(
|
||||
description="Text patterns to exclude (max 1 string, up to 5 words)",
|
||||
default=[],
|
||||
advanced=True,
|
||||
)
|
||||
contents: ContentSettings = SchemaField(
|
||||
description="Content retrieval settings",
|
||||
default=ContentSettings(),
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
results: List[Any] = SchemaField(
|
||||
description="List of similar documents with title, URL, published date, author, and score",
|
||||
default=[],
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="5e7315d1-af61-4a0c-9350-7c868fa7438a",
|
||||
description="Finds similar links using Exa's findSimilar API",
|
||||
categories={BlockCategory.SEARCH},
|
||||
input_schema=ExaFindSimilarBlock.Input,
|
||||
output_schema=ExaFindSimilarBlock.Output,
|
||||
)
|
||||
|
||||
def run(
|
||||
self, input_data: Input, *, credentials: ExaCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
url = "https://api.exa.ai/findSimilar"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"x-api-key": credentials.api_key.get_secret_value(),
|
||||
}
|
||||
|
||||
payload = {
|
||||
"url": input_data.url,
|
||||
"numResults": input_data.number_of_results,
|
||||
"contents": input_data.contents.dict(),
|
||||
}
|
||||
|
||||
optional_field_mapping = {
|
||||
"include_domains": "includeDomains",
|
||||
"exclude_domains": "excludeDomains",
|
||||
"include_text": "includeText",
|
||||
"exclude_text": "excludeText",
|
||||
}
|
||||
|
||||
# Add optional fields if they have values
|
||||
for input_field, api_field in optional_field_mapping.items():
|
||||
value = getattr(input_data, input_field)
|
||||
if value: # Only add non-empty values
|
||||
payload[api_field] = value
|
||||
|
||||
date_field_mapping = {
|
||||
"start_crawl_date": "startCrawlDate",
|
||||
"end_crawl_date": "endCrawlDate",
|
||||
"start_published_date": "startPublishedDate",
|
||||
"end_published_date": "endPublishedDate",
|
||||
}
|
||||
|
||||
# Add dates if they exist
|
||||
for input_field, api_field in date_field_mapping.items():
|
||||
value = getattr(input_data, input_field, None)
|
||||
if value:
|
||||
payload[api_field] = value.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||
|
||||
try:
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
yield "results", data.get("results", [])
|
||||
except Exception as e:
|
||||
yield "error", str(e)
|
||||
yield "results", []
|
|
@ -4,6 +4,74 @@
|
|||
transition: border-color 0.3s ease-in-out;
|
||||
}
|
||||
|
||||
.custom-node [data-id="input-handles"] {
|
||||
padding: 0 1.25rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.custom-node [data-id="input-handles"] > div > div {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.handle-container {
|
||||
display: flex;
|
||||
position: relative;
|
||||
margin-bottom: 0px;
|
||||
padding: 0.75rem 1.25rem;
|
||||
min-height: 44px;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.custom-node input:not([type="checkbox"]),
|
||||
.custom-node textarea,
|
||||
.custom-node select {
|
||||
width: calc(100% - 2.5rem);
|
||||
max-width: 400px;
|
||||
margin: 0.5rem 1.25rem;
|
||||
}
|
||||
|
||||
.custom-node [data-id^="date-picker"] {
|
||||
margin: 0.5rem 1.25rem;
|
||||
width: calc(100% - 2.5rem);
|
||||
}
|
||||
|
||||
.custom-node [data-list-container] {
|
||||
margin: 0.5rem 1.25rem;
|
||||
width: calc(100% - 2.5rem);
|
||||
}
|
||||
|
||||
.custom-node [data-add-item] {
|
||||
margin: 0.5rem 1.25rem;
|
||||
width: calc(100% - 2.5rem);
|
||||
padding: 0.5rem;
|
||||
}
|
||||
|
||||
.array-item-container {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
margin: 0.5rem 1.25rem;
|
||||
width: calc(100% - 2.5rem);
|
||||
}
|
||||
|
||||
.custom-node [data-content-settings] {
|
||||
margin: 0.5rem 1.25rem;
|
||||
width: calc(100% - 2.5rem);
|
||||
}
|
||||
|
||||
.custom-node .custom-switch {
|
||||
padding: 0.5rem 1.25rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.error-message {
|
||||
color: #d9534f;
|
||||
font-size: 13px;
|
||||
margin: 0.25rem 1.25rem;
|
||||
padding-left: 0.5rem;
|
||||
}
|
||||
|
||||
/* Existing styles */
|
||||
.handle-container {
|
||||
display: flex;
|
||||
|
|
Loading…
Reference in New Issue