crewAIInc · ProCO-RPHopkins · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/lib/crewai/src/crewai/agent/core.py b/lib/crewai/src/crewai/agent/core.py
@@ -1528,22 +1528,49 @@ def kickoff(
         Returns:
             LiteAgentOutput: The result of the agent execution.
         """
+        # Start from the agent's configured tools (if any),
+        # but avoid mutating self.tools so repeated calls don't accumulate tools.
+        tools_for_run = list(self.tools or [])
+
+        # Add platform app tools (if configured)
         if self.apps:
             platform_tools = self.get_platform_tools(self.apps)
-            if platform_tools and self.tools is not None:
-                self.tools.extend(platform_tools)
+            if platform_tools:
+                tools_for_run.extend(platform_tools)
+
+        # Add MCP tools (if configured)
         if self.mcps:
             mcps = self.get_mcp_tools(self.mcps)
-            if mcps and self.tools is not None:
-                self.tools.extend(mcps)
+            if mcps:
+                tools_for_run.extend(mcps)
+
+        # Add multimodal tools (e.g. AddImageTool) when multimodal=True,
+        # without duplicating tool *types* across runs.
+        if getattr(self, "multimodal", False):
+            existing_types = {type(t) for t in tools_for_run}
+
+            if hasattr(self, "get_multimodal_tools"):
+                multimodal_tools = self.get_multimodal_tools() or []
+            else:
+                try:
+                    from crewai.tools.agent_tools.add_image_tool import AddImageTool
+
+                    multimodal_tools = [AddImageTool()]
+                except ImportError:
+                    multimodal_tools = []
+
+            for tool in multimodal_tools:
+                if type(tool) not in existing_types:
+                    tools_for_run.append(tool)
+                    existing_types.add(type(tool))
 
         lite_agent = LiteAgent(
             id=self.id,
             role=self.role,
             goal=self.goal,
             backstory=self.backstory,
             llm=self.llm,
-            tools=self.tools or [],
+            tools=tools_for_run,
             max_iterations=self.max_iter,
             max_execution_time=self.max_execution_time,
             respect_context_window=self.respect_context_window,
@@ -1576,12 +1603,48 @@ async def kickoff_async(
         Returns:
             LiteAgentOutput: The result of the agent execution.
         """
+        # Mirror the tooling behaviour from the synchronous kickoff so that
+        # async kickoff has the same platform/MCP/multimodal tools available.
+        tools_for_run = list(self.tools or [])
+
+        # Add platform app tools (if configured)
+        if self.apps:
+            platform_tools = self.get_platform_tools(self.apps)
+            if platform_tools:
+                tools_for_run.extend(platform_tools)
+
+        # Add MCP tools (if configured)
+        if self.mcps:
+            mcps = self.get_mcp_tools(self.mcps)
+            if mcps:
+                tools_for_run.extend(mcps)
+
+        # Add multimodal tools (e.g. AddImageTool) when multimodal=True,
+        # without duplicating tool *types* across runs.
+        if getattr(self, "multimodal", False):
+            existing_types = {type(t) for t in tools_for_run}
+
+            if hasattr(self, "get_multimodal_tools"):
+                multimodal_tools = self.get_multimodal_tools() or []
+            else:
+                try:
+                    from crewai.tools.agent_tools.add_image_tool import AddImageTool
+
+                    multimodal_tools = [AddImageTool()]
+                except ImportError:
+                    multimodal_tools = []
+
+            for tool in multimodal_tools:
+                if type(tool) not in existing_types:
+                    tools_for_run.append(tool)
+                    existing_types.add(type(tool))
+
         lite_agent = LiteAgent(
             role=self.role,
             goal=self.goal,
             backstory=self.backstory,
             llm=self.llm,
-            tools=self.tools or [],
+            tools=tools_for_run,
             max_iterations=self.max_iter,
             max_execution_time=self.max_execution_time,
             respect_context_window=self.respect_context_window,

diff --git a/lib/crewai/tests/agents/test_agent_kickoff_multimodal.py b/lib/crewai/tests/agents/test_agent_kickoff_multimodal.py
@@ -0,0 +1,169 @@
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from crewai.agent.core import Agent
+from crewai.tools.agent_tools.add_image_tool import AddImageTool
+from crewai.tools.base_tool import BaseTool
+
+
+class _SpyLiteAgent:
+    """LiteAgent stand-in that just records the tools passed in."""
+
+    def __init__(
+        self,
+        *args: Any,
+        tools: list[Any] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        self.tools = list(tools or [])
+
+    def kickoff(self, messages: Any) -> dict[str, Any]:
+        # We don't care about LLM behaviour here, only tool wiring.
+        return {"messages": messages, "tools": self.tools}
+
+    async def kickoff_async(self, messages: Any) -> dict[str, Any]:
+        # Mirror the sync API for async tests.
+        return {"messages": messages, "tools": self.tools}
+
+
+class _DummyTool(BaseTool):
+    name: str = "DummyTool"
+    description: str = "A dummy tool for testing"
+
+    def _run(self, *args: Any, **kwargs: Any) -> Any:
+        # Minimal implementation; we only care that the tool is instantiable.
+        return "dummy output"
+
+    name: str = "dummy"
+    description: str = "A dummy tool for testing"
+
+
+def _patch_lite_agent(monkeypatch, spy_cls=_SpyLiteAgent) -> None:
+    import crewai.agent.core as agent_core
+
+    monkeypatch.setattr(agent_core, "LiteAgent", spy_cls)
+
+
+def test_agent_kickoff_multimodal_adds_add_image_tool_once(monkeypatch) -> None:
+    _patch_lite_agent(monkeypatch)
+
+    agent = Agent(
+        role="Image agent",
+        goal="Handle image inputs",
+        backstory="Test agent for multimodal kickoff wiring.",
+        tools=[],  # start with no tools
+        multimodal=True,
+    )
+
+    result1 = agent.kickoff("Describe https://example.com/image.png")
+    tools1 = result1["tools"]
+
+    # We should get exactly one AddImageTool
+    add_image_tools_1 = [t for t in tools1 if isinstance(t, AddImageTool)]
+    assert add_image_tools_1, "Expected AddImageTool to be injected for multimodal agent kickoff"
+    assert len(add_image_tools_1) == 1, "Expected exactly one AddImageTool on first kickoff run"
+
+    # A second kickoff should not accumulate extra AddImageTool instances.
+    result2 = agent.kickoff("Describe https://example.com/another.png")
+    tools2 = result2["tools"]
+    add_image_tools_2 = [t for t in tools2 if isinstance(t, AddImageTool)]
+    assert len(add_image_tools_2) == 1, "Expected no duplicate AddImageTool instances across multiple kickoffs"
+
+
+def test_agent_kickoff_multimodal_does_not_duplicate_existing_add_image_tool(
+    monkeypatch,
+) -> None:
+    _patch_lite_agent(monkeypatch)
+
+    # Agent already has an AddImageTool in its tools list.
+    agent = Agent(
+        role="Image agent",
+        goal="Handle image inputs",
+        backstory="Test agent with existing AddImageTool.",
+        tools=[AddImageTool()],
+        multimodal=True,
+    )
+
+    result = agent.kickoff("Describe https://example.com/existing.png")
+    tools = result["tools"]
+    add_image_tools = [t for t in tools if isinstance(t, AddImageTool)]
+    assert (
+        len(add_image_tools) == 1
+    ), "Agent.kickoff should not duplicate an existing AddImageTool"
+
+
+def test_agent_kickoff_does_not_mutate_agent_tools(monkeypatch) -> None:
+    _patch_lite_agent(monkeypatch)
+
+    # Start with a single dummy tool in the agent's tools list.
+    dummy = _DummyTool()
+    agent = Agent(
+        role="Image agent",
+        goal="Handle image inputs",
+        backstory="Agent with existing non-image tool.",
+        tools=[dummy],
+        multimodal=True,
+    )
+
+    original_tools_id = id(agent.tools)
+    original_len = len(agent.tools)
+
+    result = agent.kickoff("Describe https://example.com/with-dummy.png")
+    tools = result["tools"]
+
+    # LiteAgent should see both the original tool and AddImageTool.
+    assert any(isinstance(t, _DummyTool) for t in tools), "Expected DummyTool to be preserved for kickoff"
+    assert any(isinstance(t, AddImageTool) for t in tools), "Expected AddImageTool to be injected for kickoff"
+
+    # Agent.tools should not be mutated in-place or grow with extra tools.
+    assert len(agent.tools) == original_len, "Agent.tools length should not change during kickoff"
+    assert id(agent.tools) == original_tools_id, "Agent.tools reference should not be replaced during kickoff"
+
+
+@pytest.mark.asyncio
+async def test_agent_kickoff_async_multimodal_adds_add_image_tool_once(monkeypatch) -> None:
+    _patch_lite_agent(monkeypatch)
+
+    agent = Agent(
+        role="Image agent",
+        goal="Handle image inputs async",
+        backstory="Test agent for async multimodal kickoff wiring.",
+        tools=[],
+        multimodal=True,
+    )
+
+    result1 = await agent.kickoff_async("Describe https://example.com/image.png")
+    tools1 = result1["tools"]
+    add_image_tools_1 = [t for t in tools1 if isinstance(t, AddImageTool)]
+    assert add_image_tools_1, "Expected AddImageTool to be injected for multimodal agent kickoff_async"
+    assert len(add_image_tools_1) == 1, "Expected exactly one AddImageTool on first kickoff_async run"
+
+    result2 = await agent.kickoff_async("Describe https://example.com/another.png")
+    tools2 = result2["tools"]
+    add_image_tools_2 = [t for t in tools2 if isinstance(t, AddImageTool)]
+    assert len(add_image_tools_2) == 1, "Expected no duplicate AddImageTool instances across multiple kickoff_async runs"
+
+
+@pytest.mark.asyncio
+async def test_agent_kickoff_async_multimodal_does_not_duplicate_existing_add_image_tool(
+    monkeypatch,
+) -> None:
+    _patch_lite_agent(monkeypatch)
+
+    agent = Agent(
+        role="Image agent",
+        goal="Handle image inputs async",
+        backstory="Async agent with existing AddImageTool.",
+        tools=[AddImageTool()],
+        multimodal=True,
+    )
+
+    result = await agent.kickoff_async("Describe https://example.com/existing.png")
+    tools = result["tools"]
+    add_image_tools = [t for t in tools if isinstance(t, AddImageTool)]
+    assert (
+        len(add_image_tools) == 1
+    ), "Agent.kickoff_async should not duplicate an existing AddImageTool"