feat(block): Support find all regex extraction for ExtractTextInformationBlock (#8934)

ExtractTextInformationBlock is only supporting extracting one match. ### Changes 🏗️ Adding find_all option to ExtractTextInformationBlock. ### Checklist 📋 #### For code changes: - [ ] I have clearly listed my changes in the PR description - [ ] I have made a test plan - [ ] I have tested my changes according to the test plan:  - [ ] ... <details> <summary>Example test plan</summary> - [ ] Create from scratch and execute an agent with at least 3 blocks - [ ] Import an agent from file upload, and confirm it executes correctly - [ ] Upload agent to marketplace - [ ] Import an agent from marketplace and confirm it executes correctly - [ ] Edit an agent from monitor, and confirm it executes correctly </details> #### For configuration changes: - [ ] `.env.example` is updated or already compatible with my changes - [ ] `docker-compose.yml` is updated or already compatible with my changes - [ ] I have included a list of my configuration changes in the PR description (under **Changes**) <details> <summary>Examples of configuration changes</summary> - Changing ports - Adding new services that need to communicate with each other - Secrets or environment variable changes - New or infrastructure changes such as databases </details>
2024-12-10 15:56:06 -06:00 · 2024-12-10 15:56:06 -06:00 · d827d4f9e4
parent 984d42234c
commit d827d4f9e4
1 changed files with 29 additions and 7 deletions
--- a/autogpt_platform/backend/backend/blocks/text.py
+++ b/autogpt_platform/backend/backend/blocks/text.py
@ -71,6 +71,7 @@ class ExtractTextInformationBlock(Block):
            description="Case sensitive match", default=True
        )
        dot_all: bool = SchemaField(description="Dot matches all", default=True)
+        find_all: bool = SchemaField(description="Find all matches", default=False)

    class Output(BlockSchema):
        positive: str = SchemaField(description="Extracted text")
@ -88,12 +89,27 @@ class ExtractTextInformationBlock(Block):
                {"text": "Hello, World!", "pattern": "Hello, (.+)", "group": 0},
                {"text": "Hello, World!", "pattern": "Hello, (.+)", "group": 2},
                {"text": "Hello, World!", "pattern": "hello,", "case_sensitive": False},
+                {
+                    "text": "Hello, World!! Hello, Earth!!",
+                    "pattern": "Hello, (\\S+)",
+                    "group": 1,
+                    "find_all": False,
+                },
+                {
+                    "text": "Hello, World!! Hello, Earth!!",
+                    "pattern": "Hello, (\\S+)",
+                    "group": 1,
+                    "find_all": True,
+                },
            ],
            test_output=[
                ("positive", "World!"),
                ("positive", "Hello, World!"),
                ("negative", "Hello, World!"),
                ("positive", "Hello,"),
+                ("positive", "World!!"),
+                ("positive", "World!!"),
+                ("positive", "Earth!!"),
            ],
        )

@ -105,15 +121,21 @@ class ExtractTextInformationBlock(Block):
            flags = flags | re.DOTALL

        if isinstance(input_data.text, str):
-            text = input_data.text
+            txt = input_data.text
        else:
-            text = json.dumps(input_data.text)
+            txt = json.dumps(input_data.text)

-        match = re.search(input_data.pattern, text, flags)
-        if match and input_data.group <= len(match.groups()):
-            yield "positive", match.group(input_data.group)
-        else:
-            yield "negative", text
+        matches = [
+            match.group(input_data.group)
+            for match in re.finditer(input_data.pattern, txt, flags)
+            if input_data.group <= len(match.groups())
+        ]
+        for match in matches:
+            yield "positive", match
+            if not input_data.find_all:
+                return
+        if not matches:
+            yield "negative", input_data.text


 class FillTextTemplateBlock(Block):