feat(block): Support find all regex extraction for ExtractTextInformationBlock (#8934)

ExtractTextInformationBlock is only supporting extracting one match.

### Changes 🏗️

Adding find_all option to ExtractTextInformationBlock.

### Checklist 📋

#### For code changes:
- [ ] I have clearly listed my changes in the PR description
- [ ] I have made a test plan
- [ ] I have tested my changes according to the test plan:
  <!-- Put your test plan here: -->
  - [ ] ...

<details>
  <summary>Example test plan</summary>
  
  - [ ] Create from scratch and execute an agent with at least 3 blocks
- [ ] Import an agent from file upload, and confirm it executes
correctly
  - [ ] Upload agent to marketplace
- [ ] Import an agent from marketplace and confirm it executes correctly
  - [ ] Edit an agent from monitor, and confirm it executes correctly
</details>

#### For configuration changes:
- [ ] `.env.example` is updated or already compatible with my changes
- [ ] `docker-compose.yml` is updated or already compatible with my
changes
- [ ] I have included a list of my configuration changes in the PR
description (under **Changes**)

<details>
  <summary>Examples of configuration changes</summary>

  - Changing ports
  - Adding new services that need to communicate with each other
  - Secrets or environment variable changes
  - New or infrastructure changes such as databases
</details>
This commit is contained in:
Zamil Majdy 2024-12-10 15:56:06 -06:00 committed by GitHub
parent 984d42234c
commit d827d4f9e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 29 additions and 7 deletions

View File

@ -71,6 +71,7 @@ class ExtractTextInformationBlock(Block):
description="Case sensitive match", default=True
)
dot_all: bool = SchemaField(description="Dot matches all", default=True)
find_all: bool = SchemaField(description="Find all matches", default=False)
class Output(BlockSchema):
positive: str = SchemaField(description="Extracted text")
@ -88,12 +89,27 @@ class ExtractTextInformationBlock(Block):
{"text": "Hello, World!", "pattern": "Hello, (.+)", "group": 0},
{"text": "Hello, World!", "pattern": "Hello, (.+)", "group": 2},
{"text": "Hello, World!", "pattern": "hello,", "case_sensitive": False},
{
"text": "Hello, World!! Hello, Earth!!",
"pattern": "Hello, (\\S+)",
"group": 1,
"find_all": False,
},
{
"text": "Hello, World!! Hello, Earth!!",
"pattern": "Hello, (\\S+)",
"group": 1,
"find_all": True,
},
],
test_output=[
("positive", "World!"),
("positive", "Hello, World!"),
("negative", "Hello, World!"),
("positive", "Hello,"),
("positive", "World!!"),
("positive", "World!!"),
("positive", "Earth!!"),
],
)
@ -105,15 +121,21 @@ class ExtractTextInformationBlock(Block):
flags = flags | re.DOTALL
if isinstance(input_data.text, str):
text = input_data.text
txt = input_data.text
else:
text = json.dumps(input_data.text)
txt = json.dumps(input_data.text)
match = re.search(input_data.pattern, text, flags)
if match and input_data.group <= len(match.groups()):
yield "positive", match.group(input_data.group)
else:
yield "negative", text
matches = [
match.group(input_data.group)
for match in re.finditer(input_data.pattern, txt, flags)
if input_data.group <= len(match.groups())
]
for match in matches:
yield "positive", match
if not input_data.find_all:
return
if not matches:
yield "negative", input_data.text
class FillTextTemplateBlock(Block):