stanfordnlp · Olocool17 · Dec 11, 2025 · Dec 11, 2025 · chenmoneygithub · Dec 11, 2025
diff --git a/dspy/clients/cache.py b/dspy/clients/cache.py
@@ -122,7 +122,6 @@ def get(self, request: dict[str, Any], ignored_args_for_cache_key: list[str] | N
         if hasattr(response, "usage"):
             # Clear the usage data when cache is hit, because no LM call is made
             response.usage = {}
-            response.cache_hit = True
         return response
 
     def put(

diff --git a/dspy/clients/lm.py b/dspy/clients/lm.py
@@ -6,6 +6,7 @@
 from typing import Any, Literal, cast
 
 import litellm
+import pydantic
 from anyio.streams.memory import MemoryObjectSendStream
 from asyncer import syncify
 
@@ -163,7 +164,7 @@ def forward(
 
         self._check_truncation(results)
 
-        if not getattr(results, "cache_hit", False) and dspy.settings.usage_tracker and hasattr(results, "usage"):
+        if dspy.settings.usage_tracker and hasattr(results, "usage"):
             settings.usage_tracker.add_usage(self.model, dict(results.usage))
         return results
 
@@ -201,7 +202,7 @@ async def aforward(
 
         self._check_truncation(results)
 
-        if not getattr(results, "cache_hit", False) and dspy.settings.usage_tracker and hasattr(results, "usage"):
+        if dspy.settings.usage_tracker and hasattr(results, "usage"):
             settings.usage_tracker.add_usage(self.model, dict(results.usage))
         return results
 
@@ -501,6 +502,12 @@ def _convert_chat_request_to_responses_request(request: dict[str, Any]):
     # Convert `response_format` to `text.format` for Responses API
     if "response_format" in request:
         response_format = request.pop("response_format")
+        if isinstance(response_format, type) and issubclass(response_format, pydantic.BaseModel):
+            response_format = {
+                "name": response_format.__name__,
+                "type": "json_schema",
+                "schema": response_format.model_json_schema(),
+            }
         text = request.pop("text", {})
         request["text"] = {**text, "format": response_format}