Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.vs/
.claude/
.idea/
.worktrees/
#Emacs backup
*~
# Byte-compiled / optimized / DLL files
Expand Down
28 changes: 24 additions & 4 deletions libs/core/langchain_core/messages/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2183,6 +2183,12 @@ def _convert_to_openai_tool_calls(tool_calls: list[ToolCall]) -> list[dict]:
]


_TOOL_FORMAT_OFFSETS: dict[str, int] = {
"openai": 32, # {"type":"function","function":{...}} envelope
"anthropic": 0, # flat name/description/input_schema ≈ neutral form
}


def count_tokens_approximately(
messages: Iterable[MessageLikeRepresentation],
*,
Expand All @@ -2192,6 +2198,7 @@ def count_tokens_approximately(
tokens_per_image: int = 85,
use_usage_metadata_scaling: bool = False,
tools: list[BaseTool | dict[str, Any]] | None = None,
tool_format: str = "openai",
) -> int:
"""Approximate the total number of tokens in messages.

Expand Down Expand Up @@ -2223,7 +2230,11 @@ def count_tokens_approximately(
`AI_total_tokens / approx_tokens_up_to_that_AI_message`
tools: List of tools to include in the token count. Each tool can be either
a `BaseTool` instance or a dict representing a tool schema. `BaseTool`
instances are converted to OpenAI tool format before counting.
instances use a cached neutral payload size plus a per-format offset.
tool_format: Wire format used when estimating tool token costs for `BaseTool`
instances. Supported values: `"openai"` (default, adds ~32 chars of
envelope overhead) and `"anthropic"` (no overhead). Ignored for dict
tools, which are measured as-is.

Returns:
Approximate number of tokens in the messages (and tools, if provided).
Expand All @@ -2249,10 +2260,19 @@ def count_tokens_approximately(

# Count tokens for tools if provided
if tools:
from langchain_core.tools import BaseTool as _BaseTool # noqa: PLC0415

offset = _TOOL_FORMAT_OFFSETS.get(tool_format, 0)
tools_chars = 0
for tool in tools:
tool_dict = tool if isinstance(tool, dict) else convert_to_openai_tool(tool)
tools_chars += len(json.dumps(tool_dict))
for tool_item in tools:
if isinstance(tool_item, dict):
tools_chars += len(json.dumps(tool_item, default=str))
elif isinstance(tool_item, _BaseTool):
tools_chars += tool_item._approximate_schema_chars + offset # noqa: SLF001
else:
tools_chars += len(
json.dumps(convert_to_openai_tool(tool_item), default=str)
)
token_count += math.ceil(tools_chars / chars_per_token)

for message in converted_messages:
Expand Down
2 changes: 2 additions & 0 deletions libs/core/langchain_core/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"StructuredTool",
"Tool",
"ToolException",
"ToolSchema",
"ToolsRenderer",
"_get_runnable_config_param",
"convert_runnable_to_tool",
Expand All @@ -70,6 +71,7 @@
"InjectedToolCallId": "base",
"SchemaAnnotationError": "base",
"ToolException": "base",
"ToolSchema": "schema",
"_get_runnable_config_param": "base",
"create_schema_from_function": "base",
"convert_runnable_to_tool": "convert",
Expand Down
Loading
Loading