OpenAICompatibleBackend¶

Backend adapter for OpenAI-compatible APIs.

Quick Example¶

from mamba_agents.backends import OpenAICompatibleBackend

# Direct instantiation
backend = OpenAICompatibleBackend(
    model="my-model",
    base_url="http://localhost:8000/v1",
    api_key="optional-key",
)

# Or use factory functions
from mamba_agents.backends import (
    create_ollama_backend,
    create_vllm_backend,
    create_lmstudio_backend,
)

# Ollama
backend = create_ollama_backend("llama3.2")

# vLLM
backend = create_vllm_backend("meta-llama/Llama-3.2-3B-Instruct")

# LM Studio
backend = create_lmstudio_backend()

Factory Functions¶

Function	Default URL	Description
`create_ollama_backend`	`localhost:11434`	Ollama
`create_vllm_backend`	`localhost:8000`	vLLM
`create_lmstudio_backend`	`localhost:1234`	LM Studio

API Reference¶

OpenAICompatibleBackend ¶

OpenAICompatibleBackend(
    model: str,
    *,
    base_url: str = "https://api.openai.com/v1",
    api_key: SecretStr | str | None = None,
    timeout: float = 60.0,
    profile: ModelProfile | None = None,
)

Bases: ModelBackend

Backend for OpenAI-compatible APIs.

Works with any API that follows the OpenAI chat completions format. Automatically handles differences between providers.

Initialize the backend.

PARAMETER	DESCRIPTION
`model`	Model identifier. TYPE: `str`
`base_url`	API base URL. TYPE: `str` DEFAULT: `'https://api.openai.com/v1'`
`api_key`	API key for authentication. TYPE: `SecretStr \| str \| None` DEFAULT: `None`
`timeout`	Request timeout in seconds. TYPE: `float` DEFAULT: `60.0`
`profile`	Custom model profile. TYPE: `ModelProfile \| None` DEFAULT: `None`

Source code in src/mamba_agents/backends/openai_compat.py

def __init__(
    self,
    model: str,
    *,
    base_url: str = "https://api.openai.com/v1",
    api_key: SecretStr | str | None = None,
    timeout: float = 60.0,
    profile: ModelProfile | None = None,
) -> None:
    """Initialize the backend.

    Args:
        model: Model identifier.
        base_url: API base URL.
        api_key: API key for authentication.
        timeout: Request timeout in seconds.
        profile: Custom model profile.
    """
    self._model = model
    self._base_url = base_url.rstrip("/")
    self._timeout = timeout
    self._profile = profile or get_profile(model)

    # Handle SecretStr
    if api_key is not None:
        self._api_key = (
            api_key.get_secret_value() if hasattr(api_key, "get_secret_value") else str(api_key)
        )
    else:
        self._api_key = None

    self._client = httpx.AsyncClient(
        base_url=self._base_url,
        timeout=timeout,
        headers=self._build_headers(),
    )

name `property` ¶

name: str

Get backend name.

model `property` ¶

model: str

Get model identifier.

profile `property` ¶

profile: ModelProfile

Get model profile.

complete `async` ¶

complete(
    messages: list[dict[str, Any]],
    *,
    tools: list[dict[str, Any]] | None = None,
    temperature: float | None = None,
    max_tokens: int | None = None,
    **kwargs: Any,
) -> ModelResponse

Generate a completion.

PARAMETER	DESCRIPTION
`messages`	Conversation messages. TYPE: `list[dict[str, Any]]`
`tools`	Available tools. TYPE: `list[dict[str, Any]] \| None` DEFAULT: `None`
`temperature`	Sampling temperature. TYPE: `float \| None` DEFAULT: `None`
`max_tokens`	Maximum tokens to generate. TYPE: `int \| None` DEFAULT: `None`
`**kwargs`	Additional options. TYPE: `Any` DEFAULT: `{}`

RETURNS	DESCRIPTION
`ModelResponse`	ModelResponse with generation results.

RAISES	DESCRIPTION
`ModelBackendError`	On API error.
`RateLimitError`	On rate limit.
`AuthenticationError`	On auth failure.

Source code in src/mamba_agents/backends/openai_compat.py

async def complete(
    self,
    messages: list[dict[str, Any]],
    *,
    tools: list[dict[str, Any]] | None = None,
    temperature: float | None = None,
    max_tokens: int | None = None,
    **kwargs: Any,
) -> ModelResponse:
    """Generate a completion.

    Args:
        messages: Conversation messages.
        tools: Available tools.
        temperature: Sampling temperature.
        max_tokens: Maximum tokens to generate.
        **kwargs: Additional options.

    Returns:
        ModelResponse with generation results.

    Raises:
        ModelBackendError: On API error.
        RateLimitError: On rate limit.
        AuthenticationError: On auth failure.
    """
    payload = self._build_payload(
        messages=messages,
        tools=tools,
        temperature=temperature,
        max_tokens=max_tokens,
        stream=False,
        **kwargs,
    )

    try:
        response = await self._client.post("/chat/completions", json=payload)
        self._check_response(response)
        data = response.json()
        return self._parse_response(data)

    except httpx.HTTPStatusError as e:
        self._handle_http_error(e)
        raise  # Never reached, but satisfies type checker

    except httpx.RequestError as e:
        raise ModelBackendError(
            f"Request failed: {e}",
            model=self._model,
            retryable=True,
            cause=e,
        ) from e

stream `async` ¶

stream(
    messages: list[dict[str, Any]],
    *,
    tools: list[dict[str, Any]] | None = None,
    temperature: float | None = None,
    max_tokens: int | None = None,
    **kwargs: Any,
) -> AsyncIterator[StreamChunk]

Generate a streaming completion.

PARAMETER	DESCRIPTION
`messages`	Conversation messages. TYPE: `list[dict[str, Any]]`
`tools`	Available tools. TYPE: `list[dict[str, Any]] \| None` DEFAULT: `None`
`temperature`	Sampling temperature. TYPE: `float \| None` DEFAULT: `None`
`max_tokens`	Maximum tokens to generate. TYPE: `int \| None` DEFAULT: `None`
`**kwargs`	Additional options. TYPE: `Any` DEFAULT: `{}`

YIELDS	DESCRIPTION
`AsyncIterator[StreamChunk]`	StreamChunk objects with partial content.

Source code in src/mamba_agents/backends/openai_compat.py

async def stream(
    self,
    messages: list[dict[str, Any]],
    *,
    tools: list[dict[str, Any]] | None = None,
    temperature: float | None = None,
    max_tokens: int | None = None,
    **kwargs: Any,
) -> AsyncIterator[StreamChunk]:
    """Generate a streaming completion.

    Args:
        messages: Conversation messages.
        tools: Available tools.
        temperature: Sampling temperature.
        max_tokens: Maximum tokens to generate.
        **kwargs: Additional options.

    Yields:
        StreamChunk objects with partial content.
    """
    payload = self._build_payload(
        messages=messages,
        tools=tools,
        temperature=temperature,
        max_tokens=max_tokens,
        stream=True,
        **kwargs,
    )

    try:
        async with self._client.stream("POST", "/chat/completions", json=payload) as response:
            self._check_response(response)

            async for line in response.aiter_lines():
                if not line or line.startswith(":"):
                    continue

                if line.startswith("data: "):
                    data_str = line[6:]
                    if data_str.strip() == "[DONE]":
                        yield StreamChunk(is_final=True)
                        break

                    try:
                        data = json.loads(data_str)
                        chunk = self._parse_stream_chunk(data)
                        if chunk:
                            yield chunk
                    except json.JSONDecodeError:
                        logger.warning("Failed to parse stream chunk: %s", line)

    except httpx.HTTPStatusError as e:
        self._handle_http_error(e)

    except httpx.RequestError as e:
        raise ModelBackendError(
            f"Stream request failed: {e}",
            model=self._model,
            retryable=True,
            cause=e,
        ) from e

health_check `async` ¶

health_check() -> bool

Check if the backend is healthy.

RETURNS	DESCRIPTION
`bool`	True if reachable.

Source code in src/mamba_agents/backends/openai_compat.py

async def health_check(self) -> bool:
    """Check if the backend is healthy.

    Returns:
        True if reachable.
    """
    try:
        # Try to list models (common endpoint)
        response = await self._client.get("/models")
        return response.status_code in (200, 401, 403)  # Even auth error means reachable
    except httpx.RequestError:
        return False

close `async` ¶

close() -> None

Close the HTTP client.

Source code in src/mamba_agents/backends/openai_compat.py

async def close(self) -> None:
    """Close the HTTP client."""
    await self._client.aclose()

create_ollama_backend ¶

create_ollama_backend(
    model: str,
    *,
    base_url: str = "http://localhost:11434/v1",
    **kwargs: Any,
) -> OpenAICompatibleBackend

Create a backend configured for Ollama.

PARAMETER	DESCRIPTION
`model`	Model name (e.g., "llama3.2", "mistral"). TYPE: `str`
`base_url`	Ollama API URL. TYPE: `str` DEFAULT: `'http://localhost:11434/v1'`
`**kwargs`	Additional backend options. TYPE: `Any` DEFAULT: `{}`

RETURNS	DESCRIPTION
`OpenAICompatibleBackend`	Configured OpenAICompatibleBackend.

Source code in src/mamba_agents/backends/openai_compat.py

def create_ollama_backend(
    model: str,
    *,
    base_url: str = "http://localhost:11434/v1",
    **kwargs: Any,
) -> OpenAICompatibleBackend:
    """Create a backend configured for Ollama.

    Args:
        model: Model name (e.g., "llama3.2", "mistral").
        base_url: Ollama API URL.
        **kwargs: Additional backend options.

    Returns:
        Configured OpenAICompatibleBackend.
    """
    return OpenAICompatibleBackend(
        model=model,
        base_url=base_url,
        api_key=None,  # Ollama doesn't require auth by default
        **kwargs,
    )

create_vllm_backend ¶

create_vllm_backend(
    model: str,
    *,
    base_url: str = "http://localhost:8000/v1",
    api_key: str | None = None,
    **kwargs: Any,
) -> OpenAICompatibleBackend

Create a backend configured for vLLM.

PARAMETER	DESCRIPTION
`model`	Model name. TYPE: `str`
`base_url`	vLLM API URL. TYPE: `str` DEFAULT: `'http://localhost:8000/v1'`
`api_key`	API key if required. TYPE: `str \| None` DEFAULT: `None`
`**kwargs`	Additional backend options. TYPE: `Any` DEFAULT: `{}`

RETURNS	DESCRIPTION
`OpenAICompatibleBackend`	Configured OpenAICompatibleBackend.

Source code in src/mamba_agents/backends/openai_compat.py

def create_vllm_backend(
    model: str,
    *,
    base_url: str = "http://localhost:8000/v1",
    api_key: str | None = None,
    **kwargs: Any,
) -> OpenAICompatibleBackend:
    """Create a backend configured for vLLM.

    Args:
        model: Model name.
        base_url: vLLM API URL.
        api_key: API key if required.
        **kwargs: Additional backend options.

    Returns:
        Configured OpenAICompatibleBackend.
    """
    return OpenAICompatibleBackend(
        model=model,
        base_url=base_url,
        api_key=api_key,
        **kwargs,
    )

create_lmstudio_backend ¶

create_lmstudio_backend(
    model: str = "local-model",
    *,
    base_url: str = "http://localhost:1234/v1",
    **kwargs: Any,
) -> OpenAICompatibleBackend

Create a backend configured for LM Studio.

PARAMETER	DESCRIPTION
`model`	Model identifier (can be any name). TYPE: `str` DEFAULT: `'local-model'`
`base_url`	LM Studio API URL. TYPE: `str` DEFAULT: `'http://localhost:1234/v1'`
`**kwargs`	Additional backend options. TYPE: `Any` DEFAULT: `{}`

RETURNS	DESCRIPTION
`OpenAICompatibleBackend`	Configured OpenAICompatibleBackend.

Source code in src/mamba_agents/backends/openai_compat.py

def create_lmstudio_backend(
    model: str = "local-model",
    *,
    base_url: str = "http://localhost:1234/v1",
    **kwargs: Any,
) -> OpenAICompatibleBackend:
    """Create a backend configured for LM Studio.

    Args:
        model: Model identifier (can be any name).
        base_url: LM Studio API URL.
        **kwargs: Additional backend options.

    Returns:
        Configured OpenAICompatibleBackend.
    """
    return OpenAICompatibleBackend(
        model=model,
        base_url=base_url,
        api_key=None,
        **kwargs,
    )

OpenAICompatibleBackend¶

Quick Example¶

Factory Functions¶

API Reference¶

OpenAICompatibleBackend ¶

name property ¶

model property ¶

profile property ¶

complete async ¶

stream async ¶

health_check async ¶

close async ¶

create_ollama_backend ¶

create_vllm_backend ¶

create_lmstudio_backend ¶

name `property` ¶

model `property` ¶

profile `property` ¶

complete `async` ¶

stream `async` ¶

health_check `async` ¶

close `async` ¶