update examples

pchalasani · pchalasani · commit 2cac4bbc326f · 2024-11-01T10:16:46.000-04:00
diff --git a/examples/basic/1d-screen-click.py b/examples/basic/1d-screen-click.py
@@ -0,0 +1,206 @@
+"""
+
+A Bit-Shooter Game played on a 1-dimensional binary screen.
+
+Given an LLM Agent access to a 1-dimensional "screen" represented
+as a string of bits (0s and 1s), e.g. "101010",
+and equip it with a "Click tool" (like a mouse click) that allows it to
+click on a bit -- clicking the bit causes it to flip.
+
+The Agent plays a "Bit Shooter" game where the goal is to get rid of all
+1s in the "screen".
+
+To use the Click tool, the Agent must specify the position (zero-based)
+where it wants to click. This causes the bit to flip.
+The LLM is then presented with the new state of the screen,
+and the process repeats until all 1s are gone.
+
+Clearly the Agent (LLM) needs to be able to accurately count the bit positions,
+to be able to correctly click on the 1s.
+
+Run like this (--model is optional, defaults to GPT4o):
+
+python3 examples/basic/1d-screen-click.py --model litellm/anthropic/claude-3-5-sonnet-20241022
+
+At the beginning you get to specify the initial state of the screen:
+- size of the screen (how many bits)
+- the (0-based) locations of the 1s (SPACE-separated) in the screen.
+
+E.g. try this:
+- size = 50,
+- 1-indices: 0 20 30 40
+
+The loop is set to run in interactive mode (to prevent runaway loops),
+so you have to keep hitting enter to see the LLM's next move.
+
+The main observation is that when you run it with claude-3.5-sonnet,
+the accuracy of the Agent's clicks is far superior to other LLMs like GPT-4o
+and even GPT-4.
+
+To try with other LLMs, you can set the --model param to, for example:
+- gpt-4 (set OPENAI_API_KEY in your env or .env file)
+- gpt-4o (ditto, set OPENAI_API_KEY)
+- groq/llama-3.1-70b-versatile (set GROQ_API_KEY in your env or .env file)
+- cerebras/llama3.1-70b (set CEREBRAS_API_KEY in your env or .env file)
+- ollama/qwen2.5-coder:latest
+
+See here for a full guide on local/open LLM setup with Langroid:
+https://langroid.github.io/langroid/tutorials/local-llm-setup/
+And here for how to use with other non-OpenAPI LLMs:
+https://langroid.github.io/langroid/tutorials/non-openai-llms/
+"""
+
+from typing import List, Tuple
+
+import langroid as lr
+import langroid.language_models as lm
+from langroid.agent.tools.orchestration import AgentDoneTool
+from langroid.pydantic_v1 import BaseModel
+from langroid.utils.globals import GlobalState
+from rich.prompt import Prompt
+import fire
+
+
+class ScreenState(BaseModel):
+    """
+    Represents the state of the 1-dimensional binary screen
+    """
+
+    screen: str | None = None  # binary string, e.g. "101010"
+
+    def __init__(
+        self,
+        one_indices: List[int] = [1],
+        size: int = 1,
+    ):
+        super().__init__()
+        # Initialize with all zeros
+        screen_list = ["0"] * size
+
+        # Set 1s at specified indices
+        for idx in one_indices:
+            if 0 <= idx < size:
+                screen_list[idx] = "1"
+
+        # Join into string
+        self.screen = "".join(screen_list)
+
+    @classmethod
+    def set_state(
+        cls,
+        one_indices: List[int],
+        size: int,
+    ) -> "ScreenState":
+        """
+        Factory method to create and set initial state.
+        """
+        initial_state = cls(
+            one_indices=one_indices,
+            size=size,
+        )
+        GlobalScreenState.set_values(state=initial_state)
+
+    def flip(self, i: int):
+        """
+        Flip the i-th bit
+        """
+        if self.screen is None or i < 0 or i >= len(self.screen):
+            return
+
+        screen_list = list(self.screen)
+        screen_list[i] = "1" if screen_list[i] == "0" else "0"
+        self.screen = "".join(screen_list)
+
+
+class GlobalScreenState(GlobalState):
+    state: ScreenState = ScreenState()
+
+
+def get_state() -> ScreenState:
+    return GlobalScreenState.get_value("state")
+
+
+class ClickTool(lr.ToolMessage):
+    request: str = "click_tool"
+    purpose: str = """
+        To click at <position> on the 1-dimensional binary screen, 
+        which causes the bit at that position to FLIP.
+        IMPORTANT: the position numbering starts from 0!!!
+    """
+
+    position: int
+
+    @classmethod
+    def examples(cls) -> List[lr.ToolMessage | Tuple[str, lr.ToolMessage]]:
+        return [
+            cls(position=3),
+            (
+                "I want to click at position 5",
+                cls(position=5),
+            ),
+        ]
+
+    def handle(self) -> str | AgentDoneTool:
+        state = get_state()
+        state.flip(self.position)
+        print("SCREEN STATE = ", state.screen)
+        if "1" not in state.screen:
+            return AgentDoneTool()
+        return state.screen
+
+
+def main(model: str = ""):
+    llm_config = lm.OpenAIGPTConfig(
+        chat_model=model or lm.OpenAIChatModel.GPT4o,
+    )
+    click_tool_name = ClickTool.default_value("request")
+    agent = lr.ChatAgent(
+        lr.ChatAgentConfig(
+            name="Clicker",
+            llm=llm_config,
+            use_functions_api=False,  # suppress OpenAI functions/tools
+            use_tools=True,  # enable langroid-native tools: works with any LLM
+            show_stats=False,
+            system_message=f"""
+            You are an expert at COMPUTER USE.
+            In this task you only have to be able to understand a 1-dimensional 
+            screen presented to you as a string of bits (0s and 1s).
+            You will play a 1-dimensional BIT-shooter game!
+            
+            Your task is to CLICK ON THE LEFTMOST 1 in the bit-string, 
+            to flip it to a 0.
+            
+            Always try to click on the LEFTMOST 1 in the bit-sequence. 
+            
+            To CLICK on the screen you 
+            must use the TOOL `{click_tool_name}` where the  
+            `position` field specifies the position (zero-based) to click.
+            If you CORRECTLY click on a 1, the bit at that position will be 
+            turned to 0.
+            But if you click on a 0, it will turn into a 1, 
+            taking you further from your goal.
+            
+            So you MUST ACCURATELY specify the position of the LEFTMOST 1 to click,
+            making SURE there is a 1 at that position.
+            In other words, it is critical that you are able to ACCURATELY COUNT 
+            the bit positions so that you are able to correctly identify the position 
+            of the LEFTMOST 1 bit in the "screen" given to you as a string of bits.
+            """,
+        )
+    )
+
+    agent.enable_message(ClickTool)
+
+    task = lr.Task(agent, interactive=True, only_user_quits_root=False)
+
+    # kick it off with initial screen state (set below by user)
+    task.run(get_state())
+
+
+if __name__ == "__main__":
+    size = int(Prompt.ask("Size of screen (how many bits)"))
+    ones = Prompt.ask("Indices of 1s (SPACE-separated)").split(" ")
+    ones = [int(x) for x in ones]
+    ScreenState.set_state(ones, size)
+    print("SCREEN STATE = ", get_state().screen)
+    fire.Fire(main)
diff --git a/examples/basic/chat-persist.py b/examples/basic/chat-persist.py
@@ -1,5 +1,5 @@
 """
-Variant of chat.py, showing how you can save conversation state, end the script
+Variant of chat.py, showing how you can save conversation state, end the script, and
 resume the conversation later by re-running the script.
 
 The most basic chatbot example, using the default settings.
diff --git a/examples/basic/intent-classifier.py b/examples/basic/intent-classifier.py
@@ -0,0 +1,118 @@
+"""
+Agent-loop to classify the intent of a given text.
+
+Run like this (--model is optional, defaults to GPT4o):
+
+python3 examples/basic/intent-classifier.py --model groq/llama-3.1-8b-instant
+
+Other ways to specify the model:
+- gpt-4 (set OPENAI_API_KEY in your env or .env file)
+- gpt-4o (ditto, set OPENAI_API_KEY)
+- cerebras/llama3.1-70b (set CEREBRAS_API_KEY)
+
+For more ways to use langroid with other LLMs, see:
+- local/open LLMs: https://langroid.github.io/langroid/tutorials/local-llm-setup/
+- non-OpenAPI LLMs: https://langroid.github.io/langroid/tutorials/non-openai-llms/
+"""
+
+from typing import List, Tuple
+
+import langroid as lr
+from langroid.agent.tools.orchestration import ResultTool
+import langroid.language_models as lm
+from enum import Enum
+from rich.prompt import Prompt
+from fire import Fire
+
+
+class Intent(str, Enum):
+    GREETING = "greeting"
+    FAREWELL = "farewell"
+    QUESTION = "question"
+    STATEMENT = "statement"
+
+
+class IntentTool(lr.ToolMessage):
+    request: str = "intent_tool"
+    purpose: str = """
+        To classify the <intent> of a given text, into one of:
+        - greeting
+        - farewell
+        - question
+        - statement
+        """
+
+    intent: Intent
+
+    @classmethod
+    def examples(cls) -> List[lr.ToolMessage | Tuple[str, lr.ToolMessage]]:
+        """Use these as few-shot tool examples"""
+        return [
+            cls(intent=Intent.GREETING),
+            ("I want to classify this as a question", cls(intent=Intent.QUESTION)),
+        ]
+
+    def handle(self) -> ResultTool:
+        """Handle the tool sent by LLM"""
+
+        # ResultTool ends the task
+        return ResultTool(intent=self.intent)
+
+    def handle_message_fallback(self, message: lr.ChatDocument) -> str | None:
+        """We end up here if the LLM did not send a Tool, so nudge it"""
+        if (
+            isinstance(message, lr.ChatDocument)
+            and message.metadata.sender == lr.Entity.LLM
+        ):
+            return """
+            You forgot to use the `intent_tool` to classify the intent.
+            """
+
+
+def main(model: str = ""):
+    intent_tool_name = IntentTool.default_value("request")
+    agent = lr.ChatAgent(
+        lr.ChatAgentConfig(
+            name="Intent",
+            llm=lm.OpenAIGPTConfig(chat_model=model or lm.OpenAIChatModel.GPT4o),
+            use_functions_api=False,  # suppress OpenAI functions/tools
+            use_tools=True,  # use langroid-native tools: works with ANY LLM
+            system_message=f"""
+            You are an astute INTENT CLASSIFIER: given any piece of text
+            from the user, you are able to smartly infer their intent.
+            Given such a piece of text, classify its intent into one of the following:
+             - greeting
+             - farewell
+             - question
+             - statement
+            To present your classification, use the 
+            `{intent_tool_name}` tool.
+            
+            ALWAYS use this tool to respond, do NOT say anything else.
+            """,
+        )
+    )
+
+    agent.enable_message(IntentTool)
+
+    # create a task loop specialized to return an Intent
+    task = lr.Task(agent=agent, interactive=False)[Intent]
+
+    while True:
+        text = Prompt.ask("Enter a text to classify its intent")
+        intent = task.run(
+            f"""
+         Please classify the intent of this text, present your answer
+         using the `{intent_tool_name}` tool:
+         ----
+         {text}
+         ----
+         """
+        )
+
+        print(f"Intent: {intent}")
+
+
+if __name__ == "__main__":
+
+    Fire(main)
diff --git a/examples/basic/xml_tool.py b/examples/basic/xml_tool.py