TokenCounter¶

Token counting using tiktoken.

Quick Example¶

from mamba_agents.tokens import TokenCounter

counter = TokenCounter(encoding="cl100k_base")

# Count text tokens
count = counter.count("Hello, world!")

# Count message tokens
messages = [
    {"role": "user", "content": "Hello"},
    {"role": "assistant", "content": "Hi!"},
]
count = counter.count_messages(messages)

API Reference¶

TokenCounter ¶

TokenCounter(config: TokenizerConfig | None = None)

Token counting using tiktoken.

Provides methods for counting tokens in text and message lists. Token counts are approximate and may vary from actual model tokenization.

Initialize the token counter.

PARAMETER	DESCRIPTION
`config`	Optional tokenizer configuration. TYPE: `TokenizerConfig \| None` DEFAULT: `None`

Source code in src/mamba_agents/tokens/counter.py

def __init__(self, config: TokenizerConfig | None = None) -> None:
    """Initialize the token counter.

    Args:
        config: Optional tokenizer configuration.
    """
    from mamba_agents.tokens.config import TokenizerConfig

    self._config = config or TokenizerConfig()
    self._encoding_name = self._config.encoding

    if self._config.cache_tokenizer:
        self._encoding = _get_encoding(self._encoding_name)
    else:
        self._encoding = tiktoken.get_encoding(self._encoding_name)

count ¶

count(text: str) -> int

Count tokens in text.

PARAMETER	DESCRIPTION
`text`	The text to count tokens in. TYPE: `str`

RETURNS	DESCRIPTION
`int`	Approximate token count.

Source code in src/mamba_agents/tokens/counter.py

def count(self, text: str) -> int:
    """Count tokens in text.

    Args:
        text: The text to count tokens in.

    Returns:
        Approximate token count.
    """
    return len(self._encoding.encode(text))

count_messages ¶

count_messages(messages: list[dict[str, Any]]) -> int

Count tokens in a message list.

Estimates tokens for a list of chat messages, accounting for message structure overhead.

PARAMETER	DESCRIPTION
`messages`	List of message dictionaries with 'role' and 'content'. TYPE: `list[dict[str, Any]]`

RETURNS	DESCRIPTION
`int`	Approximate total token count.

Source code in src/mamba_agents/tokens/counter.py

def count_messages(self, messages: list[dict[str, Any]]) -> int:
    """Count tokens in a message list.

    Estimates tokens for a list of chat messages, accounting for
    message structure overhead.

    Args:
        messages: List of message dictionaries with 'role' and 'content'.

    Returns:
        Approximate total token count.
    """
    total = 0

    for message in messages:
        # Add overhead per message (role, separators)
        total += 4  # Approximate overhead per message

        # Count content tokens
        content = message.get("content", "")
        if content:
            total += self.count(content)

        # Count role tokens
        role = message.get("role", "")
        if role:
            total += self.count(role)

        # Count tool call tokens if present
        tool_calls = message.get("tool_calls", [])
        for tool_call in tool_calls:
            if isinstance(tool_call, dict):
                func = tool_call.get("function", {})
                name = func.get("name", "")
                args = func.get("arguments", "")
                total += self.count(name) + self.count(args) + 10  # Overhead

    # Add final overhead
    total += 3

    return total

count_with_margin ¶

count_with_margin(text: str) -> int

Count tokens with safety margin.

PARAMETER	DESCRIPTION
`text`	The text to count. TYPE: `str`

RETURNS	DESCRIPTION
`int`	Token count plus safety margin.

Source code in src/mamba_agents/tokens/counter.py

def count_with_margin(self, text: str) -> int:
    """Count tokens with safety margin.

    Args:
        text: The text to count.

    Returns:
        Token count plus safety margin.
    """
    base_count = self.count(text)
    margin = int(base_count * self._config.safety_margin)
    return base_count + margin

fits_context ¶

fits_context(text: str, max_tokens: int) -> bool

Check if text fits within a context window.

PARAMETER	DESCRIPTION
`text`	The text to check. TYPE: `str`
`max_tokens`	Maximum token count. TYPE: `int`

RETURNS	DESCRIPTION
`bool`	True if text fits (with safety margin).

Source code in src/mamba_agents/tokens/counter.py

def fits_context(self, text: str, max_tokens: int) -> bool:
    """Check if text fits within a context window.

    Args:
        text: The text to check.
        max_tokens: Maximum token count.

    Returns:
        True if text fits (with safety margin).
    """
    return self.count_with_margin(text) <= max_tokens

get_encoding_for_model ¶

get_encoding_for_model(model: str) -> str

Get the appropriate encoding for a model.

PARAMETER	DESCRIPTION
`model`	Model name or identifier. TYPE: `str`

RETURNS	DESCRIPTION
`str`	Encoding name to use.

Source code in src/mamba_agents/tokens/counter.py

def get_encoding_for_model(self, model: str) -> str:
    """Get the appropriate encoding for a model.

    Args:
        model: Model name or identifier.

    Returns:
        Encoding name to use.
    """
    # Check model mapping
    for prefix, encoding in self._config.model_mapping.items():
        if prefix.lower() in model.lower():
            return encoding

    # Default encoding
    return self._config.encoding