Skip to content

TokenCounter

Token counting using tiktoken.

Quick Example

from mamba_agents.tokens import TokenCounter

counter = TokenCounter(encoding="cl100k_base")

# Count text tokens
count = counter.count("Hello, world!")

# Count message tokens
messages = [
    {"role": "user", "content": "Hello"},
    {"role": "assistant", "content": "Hi!"},
]
count = counter.count_messages(messages)

API Reference

TokenCounter

TokenCounter(config: TokenizerConfig | None = None)

Token counting using tiktoken.

Provides methods for counting tokens in text and message lists. Token counts are approximate and may vary from actual model tokenization.

Initialize the token counter.

PARAMETER DESCRIPTION
config

Optional tokenizer configuration.

TYPE: TokenizerConfig | None DEFAULT: None

Source code in src/mamba_agents/tokens/counter.py
def __init__(self, config: TokenizerConfig | None = None) -> None:
    """Initialize the token counter.

    Args:
        config: Optional tokenizer configuration.
    """
    from mamba_agents.tokens.config import TokenizerConfig

    self._config = config or TokenizerConfig()
    self._encoding_name = self._config.encoding

    if self._config.cache_tokenizer:
        self._encoding = _get_encoding(self._encoding_name)
    else:
        self._encoding = tiktoken.get_encoding(self._encoding_name)

count

count(text: str) -> int

Count tokens in text.

PARAMETER DESCRIPTION
text

The text to count tokens in.

TYPE: str

RETURNS DESCRIPTION
int

Approximate token count.

Source code in src/mamba_agents/tokens/counter.py
def count(self, text: str) -> int:
    """Count tokens in text.

    Args:
        text: The text to count tokens in.

    Returns:
        Approximate token count.
    """
    return len(self._encoding.encode(text))

count_messages

count_messages(messages: list[dict[str, Any]]) -> int

Count tokens in a message list.

Estimates tokens for a list of chat messages, accounting for message structure overhead.

PARAMETER DESCRIPTION
messages

List of message dictionaries with 'role' and 'content'.

TYPE: list[dict[str, Any]]

RETURNS DESCRIPTION
int

Approximate total token count.

Source code in src/mamba_agents/tokens/counter.py
def count_messages(self, messages: list[dict[str, Any]]) -> int:
    """Count tokens in a message list.

    Estimates tokens for a list of chat messages, accounting for
    message structure overhead.

    Args:
        messages: List of message dictionaries with 'role' and 'content'.

    Returns:
        Approximate total token count.
    """
    total = 0

    for message in messages:
        # Add overhead per message (role, separators)
        total += 4  # Approximate overhead per message

        # Count content tokens
        content = message.get("content", "")
        if content:
            total += self.count(content)

        # Count role tokens
        role = message.get("role", "")
        if role:
            total += self.count(role)

        # Count tool call tokens if present
        tool_calls = message.get("tool_calls", [])
        for tool_call in tool_calls:
            if isinstance(tool_call, dict):
                func = tool_call.get("function", {})
                name = func.get("name", "")
                args = func.get("arguments", "")
                total += self.count(name) + self.count(args) + 10  # Overhead

    # Add final overhead
    total += 3

    return total

count_with_margin

count_with_margin(text: str) -> int

Count tokens with safety margin.

PARAMETER DESCRIPTION
text

The text to count.

TYPE: str

RETURNS DESCRIPTION
int

Token count plus safety margin.

Source code in src/mamba_agents/tokens/counter.py
def count_with_margin(self, text: str) -> int:
    """Count tokens with safety margin.

    Args:
        text: The text to count.

    Returns:
        Token count plus safety margin.
    """
    base_count = self.count(text)
    margin = int(base_count * self._config.safety_margin)
    return base_count + margin

fits_context

fits_context(text: str, max_tokens: int) -> bool

Check if text fits within a context window.

PARAMETER DESCRIPTION
text

The text to check.

TYPE: str

max_tokens

Maximum token count.

TYPE: int

RETURNS DESCRIPTION
bool

True if text fits (with safety margin).

Source code in src/mamba_agents/tokens/counter.py
def fits_context(self, text: str, max_tokens: int) -> bool:
    """Check if text fits within a context window.

    Args:
        text: The text to check.
        max_tokens: Maximum token count.

    Returns:
        True if text fits (with safety margin).
    """
    return self.count_with_margin(text) <= max_tokens

get_encoding_for_model

get_encoding_for_model(model: str) -> str

Get the appropriate encoding for a model.

PARAMETER DESCRIPTION
model

Model name or identifier.

TYPE: str

RETURNS DESCRIPTION
str

Encoding name to use.

Source code in src/mamba_agents/tokens/counter.py
def get_encoding_for_model(self, model: str) -> str:
    """Get the appropriate encoding for a model.

    Args:
        model: Model name or identifier.

    Returns:
        Encoding name to use.
    """
    # Check model mapping
    for prefix, encoding in self._config.model_mapping.items():
        if prefix.lower() in model.lower():
            return encoding

    # Default encoding
    return self._config.encoding