Skip to content

OpenAICompatibleBackend

Backend adapter for OpenAI-compatible APIs.

Quick Example

from mamba_agents.backends import OpenAICompatibleBackend

# Direct instantiation
backend = OpenAICompatibleBackend(
    model="my-model",
    base_url="http://localhost:8000/v1",
    api_key="optional-key",
)

# Or use factory functions
from mamba_agents.backends import (
    create_ollama_backend,
    create_vllm_backend,
    create_lmstudio_backend,
)

# Ollama
backend = create_ollama_backend("llama3.2")

# vLLM
backend = create_vllm_backend("meta-llama/Llama-3.2-3B-Instruct")

# LM Studio
backend = create_lmstudio_backend()

Factory Functions

Function Default URL Description
create_ollama_backend localhost:11434 Ollama
create_vllm_backend localhost:8000 vLLM
create_lmstudio_backend localhost:1234 LM Studio

API Reference

OpenAICompatibleBackend

OpenAICompatibleBackend(
    model: str,
    *,
    base_url: str = "https://api.openai.com/v1",
    api_key: SecretStr | str | None = None,
    timeout: float = 60.0,
    profile: ModelProfile | None = None,
)

Bases: ModelBackend

Backend for OpenAI-compatible APIs.

Works with any API that follows the OpenAI chat completions format. Automatically handles differences between providers.

Initialize the backend.

PARAMETER DESCRIPTION
model

Model identifier.

TYPE: str

base_url

API base URL.

TYPE: str DEFAULT: 'https://api.openai.com/v1'

api_key

API key for authentication.

TYPE: SecretStr | str | None DEFAULT: None

timeout

Request timeout in seconds.

TYPE: float DEFAULT: 60.0

profile

Custom model profile.

TYPE: ModelProfile | None DEFAULT: None

Source code in src/mamba_agents/backends/openai_compat.py
def __init__(
    self,
    model: str,
    *,
    base_url: str = "https://api.openai.com/v1",
    api_key: SecretStr | str | None = None,
    timeout: float = 60.0,
    profile: ModelProfile | None = None,
) -> None:
    """Initialize the backend.

    Args:
        model: Model identifier.
        base_url: API base URL.
        api_key: API key for authentication.
        timeout: Request timeout in seconds.
        profile: Custom model profile.
    """
    self._model = model
    self._base_url = base_url.rstrip("/")
    self._timeout = timeout
    self._profile = profile or get_profile(model)

    # Handle SecretStr
    if api_key is not None:
        self._api_key = (
            api_key.get_secret_value() if hasattr(api_key, "get_secret_value") else str(api_key)
        )
    else:
        self._api_key = None

    self._client = httpx.AsyncClient(
        base_url=self._base_url,
        timeout=timeout,
        headers=self._build_headers(),
    )

name property

name: str

Get backend name.

model property

model: str

Get model identifier.

profile property

profile: ModelProfile

Get model profile.

complete async

complete(
    messages: list[dict[str, Any]],
    *,
    tools: list[dict[str, Any]] | None = None,
    temperature: float | None = None,
    max_tokens: int | None = None,
    **kwargs: Any,
) -> ModelResponse

Generate a completion.

PARAMETER DESCRIPTION
messages

Conversation messages.

TYPE: list[dict[str, Any]]

tools

Available tools.

TYPE: list[dict[str, Any]] | None DEFAULT: None

temperature

Sampling temperature.

TYPE: float | None DEFAULT: None

max_tokens

Maximum tokens to generate.

TYPE: int | None DEFAULT: None

**kwargs

Additional options.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
ModelResponse

ModelResponse with generation results.

RAISES DESCRIPTION
ModelBackendError

On API error.

RateLimitError

On rate limit.

AuthenticationError

On auth failure.

Source code in src/mamba_agents/backends/openai_compat.py
async def complete(
    self,
    messages: list[dict[str, Any]],
    *,
    tools: list[dict[str, Any]] | None = None,
    temperature: float | None = None,
    max_tokens: int | None = None,
    **kwargs: Any,
) -> ModelResponse:
    """Generate a completion.

    Args:
        messages: Conversation messages.
        tools: Available tools.
        temperature: Sampling temperature.
        max_tokens: Maximum tokens to generate.
        **kwargs: Additional options.

    Returns:
        ModelResponse with generation results.

    Raises:
        ModelBackendError: On API error.
        RateLimitError: On rate limit.
        AuthenticationError: On auth failure.
    """
    payload = self._build_payload(
        messages=messages,
        tools=tools,
        temperature=temperature,
        max_tokens=max_tokens,
        stream=False,
        **kwargs,
    )

    try:
        response = await self._client.post("/chat/completions", json=payload)
        self._check_response(response)
        data = response.json()
        return self._parse_response(data)

    except httpx.HTTPStatusError as e:
        self._handle_http_error(e)
        raise  # Never reached, but satisfies type checker

    except httpx.RequestError as e:
        raise ModelBackendError(
            f"Request failed: {e}",
            model=self._model,
            retryable=True,
            cause=e,
        ) from e

stream async

stream(
    messages: list[dict[str, Any]],
    *,
    tools: list[dict[str, Any]] | None = None,
    temperature: float | None = None,
    max_tokens: int | None = None,
    **kwargs: Any,
) -> AsyncIterator[StreamChunk]

Generate a streaming completion.

PARAMETER DESCRIPTION
messages

Conversation messages.

TYPE: list[dict[str, Any]]

tools

Available tools.

TYPE: list[dict[str, Any]] | None DEFAULT: None

temperature

Sampling temperature.

TYPE: float | None DEFAULT: None

max_tokens

Maximum tokens to generate.

TYPE: int | None DEFAULT: None

**kwargs

Additional options.

TYPE: Any DEFAULT: {}

YIELDS DESCRIPTION
AsyncIterator[StreamChunk]

StreamChunk objects with partial content.

Source code in src/mamba_agents/backends/openai_compat.py
async def stream(
    self,
    messages: list[dict[str, Any]],
    *,
    tools: list[dict[str, Any]] | None = None,
    temperature: float | None = None,
    max_tokens: int | None = None,
    **kwargs: Any,
) -> AsyncIterator[StreamChunk]:
    """Generate a streaming completion.

    Args:
        messages: Conversation messages.
        tools: Available tools.
        temperature: Sampling temperature.
        max_tokens: Maximum tokens to generate.
        **kwargs: Additional options.

    Yields:
        StreamChunk objects with partial content.
    """
    payload = self._build_payload(
        messages=messages,
        tools=tools,
        temperature=temperature,
        max_tokens=max_tokens,
        stream=True,
        **kwargs,
    )

    try:
        async with self._client.stream("POST", "/chat/completions", json=payload) as response:
            self._check_response(response)

            async for line in response.aiter_lines():
                if not line or line.startswith(":"):
                    continue

                if line.startswith("data: "):
                    data_str = line[6:]
                    if data_str.strip() == "[DONE]":
                        yield StreamChunk(is_final=True)
                        break

                    try:
                        data = json.loads(data_str)
                        chunk = self._parse_stream_chunk(data)
                        if chunk:
                            yield chunk
                    except json.JSONDecodeError:
                        logger.warning("Failed to parse stream chunk: %s", line)

    except httpx.HTTPStatusError as e:
        self._handle_http_error(e)

    except httpx.RequestError as e:
        raise ModelBackendError(
            f"Stream request failed: {e}",
            model=self._model,
            retryable=True,
            cause=e,
        ) from e

health_check async

health_check() -> bool

Check if the backend is healthy.

RETURNS DESCRIPTION
bool

True if reachable.

Source code in src/mamba_agents/backends/openai_compat.py
async def health_check(self) -> bool:
    """Check if the backend is healthy.

    Returns:
        True if reachable.
    """
    try:
        # Try to list models (common endpoint)
        response = await self._client.get("/models")
        return response.status_code in (200, 401, 403)  # Even auth error means reachable
    except httpx.RequestError:
        return False

close async

close() -> None

Close the HTTP client.

Source code in src/mamba_agents/backends/openai_compat.py
async def close(self) -> None:
    """Close the HTTP client."""
    await self._client.aclose()

create_ollama_backend

create_ollama_backend(
    model: str,
    *,
    base_url: str = "http://localhost:11434/v1",
    **kwargs: Any,
) -> OpenAICompatibleBackend

Create a backend configured for Ollama.

PARAMETER DESCRIPTION
model

Model name (e.g., "llama3.2", "mistral").

TYPE: str

base_url

Ollama API URL.

TYPE: str DEFAULT: 'http://localhost:11434/v1'

**kwargs

Additional backend options.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
OpenAICompatibleBackend

Configured OpenAICompatibleBackend.

Source code in src/mamba_agents/backends/openai_compat.py
def create_ollama_backend(
    model: str,
    *,
    base_url: str = "http://localhost:11434/v1",
    **kwargs: Any,
) -> OpenAICompatibleBackend:
    """Create a backend configured for Ollama.

    Args:
        model: Model name (e.g., "llama3.2", "mistral").
        base_url: Ollama API URL.
        **kwargs: Additional backend options.

    Returns:
        Configured OpenAICompatibleBackend.
    """
    return OpenAICompatibleBackend(
        model=model,
        base_url=base_url,
        api_key=None,  # Ollama doesn't require auth by default
        **kwargs,
    )

create_vllm_backend

create_vllm_backend(
    model: str,
    *,
    base_url: str = "http://localhost:8000/v1",
    api_key: str | None = None,
    **kwargs: Any,
) -> OpenAICompatibleBackend

Create a backend configured for vLLM.

PARAMETER DESCRIPTION
model

Model name.

TYPE: str

base_url

vLLM API URL.

TYPE: str DEFAULT: 'http://localhost:8000/v1'

api_key

API key if required.

TYPE: str | None DEFAULT: None

**kwargs

Additional backend options.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
OpenAICompatibleBackend

Configured OpenAICompatibleBackend.

Source code in src/mamba_agents/backends/openai_compat.py
def create_vllm_backend(
    model: str,
    *,
    base_url: str = "http://localhost:8000/v1",
    api_key: str | None = None,
    **kwargs: Any,
) -> OpenAICompatibleBackend:
    """Create a backend configured for vLLM.

    Args:
        model: Model name.
        base_url: vLLM API URL.
        api_key: API key if required.
        **kwargs: Additional backend options.

    Returns:
        Configured OpenAICompatibleBackend.
    """
    return OpenAICompatibleBackend(
        model=model,
        base_url=base_url,
        api_key=api_key,
        **kwargs,
    )

create_lmstudio_backend

create_lmstudio_backend(
    model: str = "local-model",
    *,
    base_url: str = "http://localhost:1234/v1",
    **kwargs: Any,
) -> OpenAICompatibleBackend

Create a backend configured for LM Studio.

PARAMETER DESCRIPTION
model

Model identifier (can be any name).

TYPE: str DEFAULT: 'local-model'

base_url

LM Studio API URL.

TYPE: str DEFAULT: 'http://localhost:1234/v1'

**kwargs

Additional backend options.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
OpenAICompatibleBackend

Configured OpenAICompatibleBackend.

Source code in src/mamba_agents/backends/openai_compat.py
def create_lmstudio_backend(
    model: str = "local-model",
    *,
    base_url: str = "http://localhost:1234/v1",
    **kwargs: Any,
) -> OpenAICompatibleBackend:
    """Create a backend configured for LM Studio.

    Args:
        model: Model identifier (can be any name).
        base_url: LM Studio API URL.
        **kwargs: Additional backend options.

    Returns:
        Configured OpenAICompatibleBackend.
    """
    return OpenAICompatibleBackend(
        model=model,
        base_url=base_url,
        api_key=None,
        **kwargs,
    )