-
Notifications
You must be signed in to change notification settings - Fork 5.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Assistant agent drop images when not provided with a vision-capable m…
…odel. (#5351) Allow AssistantAgent to drop images when not equipped with a multi-modal model. Adds a corresponding utility function, which can be used in autogen-ext and teams, to accomplish the same.
- Loading branch information
Showing
5 changed files
with
130 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
7 changes: 7 additions & 0 deletions
7
python/packages/autogen-agentchat/src/autogen_agentchat/utils/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
""" | ||
This module implements various utilities common to AgentChat agents and teams. | ||
""" | ||
|
||
from ._utils import remove_images | ||
|
||
__all__ = ["remove_images"] |
32 changes: 32 additions & 0 deletions
32
python/packages/autogen-agentchat/src/autogen_agentchat/utils/_utils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from typing import List | ||
|
||
from autogen_core import Image | ||
from autogen_core.models import LLMMessage, UserMessage | ||
|
||
|
||
def _image_content_to_str(content: str | List[str | Image]) -> str: | ||
"""Convert the content of an LLMMessageto a string.""" | ||
if isinstance(content, str): | ||
return content | ||
else: | ||
result: List[str] = [] | ||
for c in content: | ||
if isinstance(c, str): | ||
result.append(c) | ||
elif isinstance(c, Image): | ||
result.append("<image>") | ||
else: | ||
raise AssertionError("Received unexpected content type.") | ||
|
||
return "\n".join(result) | ||
|
||
|
||
def remove_images(messages: List[LLMMessage]) -> List[LLMMessage]: | ||
"""Remove images from a list of LLMMessages""" | ||
str_messages: List[LLMMessage] = [] | ||
for message in messages: | ||
if isinstance(message, UserMessage) and isinstance(message.content, list): | ||
str_messages.append(UserMessage(content=_image_content_to_str(message.content), source=message.source)) | ||
else: | ||
str_messages.append(message) | ||
return str_messages |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
from typing import List | ||
|
||
import pytest | ||
from autogen_agentchat.utils import remove_images | ||
from autogen_core import Image | ||
from autogen_core.models import AssistantMessage, LLMMessage, SystemMessage, UserMessage | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_remove_images() -> None: | ||
img_base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGP4//8/AAX+Av4N70a4AAAAAElFTkSuQmCC" | ||
messages: List[LLMMessage] = [ | ||
SystemMessage(content="System.1"), | ||
UserMessage(content=["User.1", Image.from_base64(img_base64)], source="user.1"), | ||
AssistantMessage(content="Assistant.1", source="assistant.1"), | ||
UserMessage(content="User.2", source="assistant.2"), | ||
] | ||
|
||
result = remove_images(messages) | ||
|
||
# Check all the invariants | ||
assert len(result) == 4 | ||
assert isinstance(result[0], SystemMessage) | ||
assert isinstance(result[1], UserMessage) | ||
assert isinstance(result[2], AssistantMessage) | ||
assert isinstance(result[3], UserMessage) | ||
assert result[0].content == messages[0].content | ||
assert result[2].content == messages[2].content | ||
assert result[3].content == messages[3].content | ||
assert isinstance(messages[2], AssistantMessage) | ||
assert isinstance(messages[3], UserMessage) | ||
assert result[2].source == messages[2].source | ||
assert result[3].source == messages[3].source | ||
|
||
# Check that the image was removed. | ||
assert result[1].content == "User.1\n<image>" |