openapi: 3.0.0
info:
  title: LowRouter API
  version: 1.0.0
  description: |
    OpenRouter-compatible API gateway for sustainable AI inference.
    Routes LLM requests to the most carbon-efficient provider while maintaining
    full compatibility with OpenAI and OpenRouter client libraries.
  contact:
    name: LowRouter Support
    url: https://github.com/carbonifer/lowrouter

servers:
  - url: /api/v1
    description: API v1 endpoint

security:
  - bearerAuth: []

paths:
  /chat/completions:
    post:
      summary: Create chat completion
      description: |
        Creates a chat completion with automatic routing to the most carbon-efficient provider.
        Fully compatible with OpenAI's chat completions API.
      operationId: createChatCompletion
      tags:
        - Completions
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
            examples:
              autoMode:
                summary: Auto routing mode
                value:
                  messages:
                    - role: user
                      content: "What is the capital of France?"
                  model: lowrouter/auto
                  stream: false
              explicitModel:
                summary: Explicit model selection
                value:
                  messages:
                    - role: user
                      content: "Write a short poem"
                  model: openai/gpt-4
                  stream: false
              withUser:
                summary: With user identifier for routing consistency
                value:
                  messages:
                    - role: user
                      content: "Continue our conversation"
                  model: lowrouter/auto
                  user: user-123
      responses:
        '200':
          description: Successful completion
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
            text/event-stream:
              schema:
                type: string
                description: Server-Sent Events stream
        '401':
          $ref: '#/components/responses/UnauthorizedError'
        '400':
          $ref: '#/components/responses/BadRequestError'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /completions:
    post:
      summary: Create text completion
      description: |
        Creates a text completion (legacy endpoint for compatibility).
        Routes to providers supporting text completion format.
      operationId: createCompletion
      tags:
        - Completions
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CompletionRequest'
            examples:
              simple:
                summary: Simple completion
                value:
                  prompt: "Once upon a time"
                  model: openai/gpt-3.5-turbo-instruct
                  max_tokens: 100
      responses:
        '200':
          description: Successful completion
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CompletionResponse'
            text/event-stream:
              schema:
                type: string
                description: Server-Sent Events stream
        '401':
          $ref: '#/components/responses/UnauthorizedError'
        '400':
          $ref: '#/components/responses/BadRequestError'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /embeddings:
    post:
      summary: Create embeddings
      description: |
        Creates an embedding vector representing the input text.
        Routes to providers supporting embeddings via Bifrost.
        Applies billing (input tokens only) and carbon tracking.
      operationId: createEmbedding
      tags:
        - Embeddings
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EmbeddingRequest'
            examples:
              simple_string:
                summary: Single string input
                value:
                  model: openai/text-embedding-3-small
                  input: "The quick brown fox"
              array_input:
                summary: Array of strings
                value:
                  model: openai/text-embedding-3-small
                  input: ["Hello world", "Goodbye world"]
      responses:
        '200':
          description: Embedding created successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EmbeddingResponse'
        '400':
          $ref: '#/components/responses/BadRequestError'
        '401':
          $ref: '#/components/responses/UnauthorizedError'
        '402':
          description: Insufficient credits
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /models:
    get:
      summary: List available models
      description: |
        Returns a list of all available models with their capabilities,
        pricing, and carbon intensity metrics.
      operationId: listModels
      tags:
        - Models
      responses:
        '200':
          description: List of models
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelsListResponse'
        '401':
          $ref: '#/components/responses/UnauthorizedError'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /models/{model}:
    get:
      summary: Retrieve a model
      description: |
        Returns details for a single model matching the OpenAI retrieve model format.
        The model parameter may contain slashes (e.g. nebius/NousResearch/Hermes-4-70B).
      operationId: getModel
      tags:
        - Models
      parameters:
        - name: model
          in: path
          required: true
          description: The model ID to retrieve
          schema:
            type: string
          example: openai/gpt-4
      responses:
        '200':
          description: Model details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelRetrieveResponse'
        '401':
          $ref: '#/components/responses/UnauthorizedError'
        '404':
          $ref: '#/components/responses/NotFoundError'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /providers:
    get:
      summary: List available providers
      description: |
        Returns a list of all configured providers with their status and regions.
        (NICE TO HAVE - may not be implemented in MVP)
      operationId: listProviders
      tags:
        - Providers
      responses:
        '200':
          description: List of providers
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ProvidersListResponse'
        '401':
          $ref: '#/components/responses/UnauthorizedError'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /generation/{generation_id}:
    get:
      summary: Get generation statistics
      description: |
        Retrieves detailed statistics for a specific generation including
        tokens, cost, carbon metrics, and latency.
        (NICE TO HAVE - may not be implemented in MVP)
      operationId: getGeneration
      tags:
        - Generations
      parameters:
        - name: generation_id
          in: path
          required: true
          schema:
            type: string
          description: The generation ID returned in the completion response
      responses:
        '200':
          description: Generation statistics
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GenerationStats'
        '401':
          $ref: '#/components/responses/UnauthorizedError'
        '404':
          $ref: '#/components/responses/NotFoundError'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /metrics/{generation_id}:
    get:
      summary: Get generation metrics
      description: |
        Retrieves carbon and energy metrics for a specific generation.
        This endpoint provides historical access to energy consumption and
        carbon emissions data for completed requests.
      operationId: getGenerationMetrics
      tags:
        - Metrics
      parameters:
        - name: generation_id
          in: path
          required: true
          schema:
            type: string
          description: The generation ID returned in the completion response
          example: chatcmpl-abc123
      responses:
        '200':
          description: Generation metrics retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GenerationMetricsResponse'
              example:
                generation_id: chatcmpl-abc123
                provider: openai
                model: gpt-4
                prompt_tokens: 50
                completion_tokens: 100
                total_tokens: 150
                energy_joules: 0.042
                carbon_gco2e: 0.0000156
                request_duration_ms: 1250
                created_at: "2025-10-24T10:15:30Z"
        '401':
          $ref: '#/components/responses/UnauthorizedError'
        '404':
          $ref: '#/components/responses/NotFoundError'
        '500':
          $ref: '#/components/responses/InternalServerError'

components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: sk-or-v1-{token}
      description: API key in format sk-or-v1-{random_alphanumeric}

  schemas:
    EmbeddingRequest:
      type: object
      required:
        - model
        - input
      properties:
        model:
          type: string
          description: |
            Model to use for embeddings. Must be an explicit model ID
            (no auto-routing for embeddings).
          example: openai/text-embedding-3-small
        input:
          description: |
            Input text to embed. Can be a string, array of strings,
            or array of integers (token IDs).
          oneOf:
            - type: string
            - type: array
              items:
                type: string
            - type: array
              items:
                type: integer
        encoding_format:
          type: string
          enum: [float, base64]
          default: float
          description: The format for the embedding output.
        dimensions:
          type: integer
          description: |
            The number of dimensions to reduce the embedding to.
            Only supported by certain models.
        user:
          type: string
          description: A unique identifier for the end-user.

    EmbeddingResponse:
      type: object
      properties:
        object:
          type: string
          enum: [list]
        data:
          type: array
          items:
            $ref: '#/components/schemas/EmbeddingData'
        model:
          type: string
        usage:
          $ref: '#/components/schemas/EmbeddingUsage'
        lowrouter_metadata:
          $ref: '#/components/schemas/LowRouterMetadata'

    EmbeddingData:
      type: object
      properties:
        object:
          type: string
          enum: [embedding]
        embedding:
          description: The embedding vector (float array or base64 string).
          oneOf:
            - type: array
              items:
                type: number
            - type: string
        index:
          type: integer

    EmbeddingUsage:
      type: object
      properties:
        prompt_tokens:
          type: integer
        total_tokens:
          type: integer
        cost:
          type: number
          format: float
          description: Cost of the request, in the currency given by `currency`.
        remaining_balance:
          type: number
          format: float
          description: Remaining account balance after this request, in the currency given by `currency`.
        currency:
          type: string
          description: ISO-4217 currency code for `cost` and `remaining_balance` (e.g. `EUR`).
          example: EUR

    ChatCompletionRequest:
      type: object
      required:
        - messages
      properties:
        model:
          type: string
          default: lowrouter/auto
          description: |
            Model to use for completion. Use 'lowrouter/auto' for automatic
            carbon-efficient routing, or specify exact model like 'openai/gpt-4'.
          example: lowrouter/auto
        messages:
          type: array
          minItems: 1
          items:
            $ref: '#/components/schemas/Message'
          description: Array of messages in the conversation
        temperature:
          type: number
          minimum: 0
          maximum: 2
          default: 1
          description: Sampling temperature between 0 and 2
        top_p:
          type: number
          minimum: 0
          maximum: 1
          default: 1
          description: Nucleus sampling parameter
        n:
          type: integer
          minimum: 1
          default: 1
          description: Number of completions to generate
        stream:
          type: boolean
          default: false
          description: Whether to stream the response via SSE
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
          description: Stop sequence(s)
        max_tokens:
          type: integer
          minimum: 1
          description: Maximum tokens to generate
        presence_penalty:
          type: number
          minimum: -2
          maximum: 2
          default: 0
          description: Presence penalty parameter
        frequency_penalty:
          type: number
          minimum: -2
          maximum: 2
          default: 0
          description: Frequency penalty parameter
        user:
          type: string
          description: |
            Optional user identifier for routing consistency. When provided,
            the same user will be routed to the same provider for cache warmth.
          example: user-123
        tools:
          type: array
          items:
            $ref: '#/components/schemas/Tool'
          description: A list of tools the model may call.
        tool_choice:
          description: |
            Controls which (if any) tool is called by the model.
            "none" means the model will not call any tool.
            "auto" means the model can pick between generating a message or calling one or more tools.
            "required" means the model must call one or more tools.
            Can also be an object specifying a particular function to call.
          oneOf:
            - type: string
              enum: [none, auto, required]
            - type: object
              properties:
                type:
                  type: string
                  enum: [function]
                function:
                  type: object
                  required:
                    - name
                  properties:
                    name:
                      type: string
        parallel_tool_calls:
          type: boolean
          default: true
          description: Whether to enable parallel function calling during tool use.

    Message:
      type: object
      required:
        - role
        - content
      properties:
        role:
          type: string
          enum: [system, user, assistant, function, tool]
          description: Role of the message sender
        content:
          type: string
          description: Content of the message. Required for all roles except assistant with tool_calls.
        name:
          type: string
          description: Optional name of the sender
        tool_calls:
          type: array
          items:
            $ref: '#/components/schemas/ToolCall'
          description: The tool calls generated by the model, such as function calls. Present in assistant messages.
        tool_call_id:
          type: string
          description: Tool call that this message is responding to. Required for tool role messages.

    ChatCompletionResponse:
      type: object
      required:
        - id
        - object
        - created
        - model
        - choices
      properties:
        id:
          type: string
          description: Unique completion identifier
          example: chatcmpl-abc123
        object:
          type: string
          enum: [chat.completion]
          description: Object type
        created:
          type: integer
          description: Unix timestamp of creation
          example: 1234567890
        model:
          type: string
          description: Model used for completion
          example: openai/gpt-4
        choices:
          type: array
          items:
            $ref: '#/components/schemas/ChatCompletionChoice'
          description: Array of completion choices
        usage:
          $ref: '#/components/schemas/Usage'
        lowrouter_metadata:
          $ref: '#/components/schemas/LowRouterMetadata'

    ChatCompletionChoice:
      type: object
      required:
        - index
        - message
        - finish_reason
      properties:
        index:
          type: integer
          description: Index of the choice
        message:
          $ref: '#/components/schemas/Message'
        finish_reason:
          type: string
          enum: [stop, length, content_filter, tool_calls, null]
          description: Reason for completion finish
          nullable: true

    ChatCompletionChunk:
      type: object
      required:
        - id
        - object
        - created
        - model
        - choices
      properties:
        id:
          type: string
          description: Unique completion identifier
        object:
          type: string
          enum: [chat.completion.chunk]
          description: Object type for streaming
        created:
          type: integer
          description: Unix timestamp of creation
        model:
          type: string
          description: Model used for completion
        choices:
          type: array
          items:
            $ref: '#/components/schemas/ChatCompletionChunkChoice'
        lowrouter_metadata:
          $ref: '#/components/schemas/LowRouterMetadata'
          description: Only present in final chunk before [DONE]

    ChatCompletionChunkChoice:
      type: object
      required:
        - index
        - delta
      properties:
        index:
          type: integer
          description: Index of the choice
        delta:
          $ref: '#/components/schemas/Delta'
        finish_reason:
          type: string
          enum: [stop, length, content_filter, tool_calls, null]
          nullable: true
          description: Present only in final chunk

    Delta:
      type: object
      properties:
        role:
          type: string
          enum: [system, user, assistant]
          description: Present only in first chunk
        content:
          type: string
          description: Incremental content
        tool_calls:
          type: array
          items:
            $ref: '#/components/schemas/DeltaToolCall'
          description: Tool calls in streaming responses

    Tool:
      type: object
      required:
        - type
        - function
      properties:
        type:
          type: string
          enum: [function]
          description: The type of the tool. Currently, only function is supported.
        function:
          $ref: '#/components/schemas/ToolFunction'

    ToolFunction:
      type: object
      required:
        - name
      properties:
        name:
          type: string
          description: The name of the function to be called.
        description:
          type: string
          description: A description of what the function does.
        parameters:
          type: object
          description: The parameters the function accepts, described as a JSON Schema object.

    ToolCall:
      type: object
      required:
        - id
        - type
        - function
      properties:
        id:
          type: string
          description: The ID of the tool call.
        type:
          type: string
          enum: [function]
          description: The type of the tool. Currently, only function is supported.
        function:
          $ref: '#/components/schemas/ToolCallFunction'

    ToolCallFunction:
      type: object
      required:
        - name
        - arguments
      properties:
        name:
          type: string
          description: The name of the function to call.
        arguments:
          type: string
          description: The arguments to call the function with, as a JSON string.

    DeltaToolCall:
      type: object
      required:
        - index
      properties:
        index:
          type: integer
          description: The index of the tool call in the tool_calls array.
        id:
          type: string
          description: The ID of the tool call.
        type:
          type: string
          enum: [function]
          description: The type of the tool.
        function:
          $ref: '#/components/schemas/DeltaToolCallFunction'

    DeltaToolCallFunction:
      type: object
      properties:
        name:
          type: string
          description: The name of the function to call (may be partial in streaming).
        arguments:
          type: string
          description: The arguments fragment (appended incrementally in streaming).

    CompletionRequest:
      type: object
      required:
        - prompt
      properties:
        model:
          type: string
          default: lowrouter/auto
          description: Model to use for completion
        prompt:
          type: string
          description: The prompt to generate completion for
        max_tokens:
          type: integer
          minimum: 1
          default: 16
          description: Maximum tokens to generate
        temperature:
          type: number
          minimum: 0
          maximum: 2
          default: 1
          description: Sampling temperature
        top_p:
          type: number
          minimum: 0
          maximum: 1
          default: 1
          description: Nucleus sampling parameter
        n:
          type: integer
          minimum: 1
          default: 1
          description: Number of completions to generate
        stream:
          type: boolean
          default: false
          description: Whether to stream the response
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
          description: Stop sequence(s)
        presence_penalty:
          type: number
          minimum: -2
          maximum: 2
          default: 0
        frequency_penalty:
          type: number
          minimum: -2
          maximum: 2
          default: 0
        user:
          type: string
          description: Optional user identifier for routing consistency

    CompletionResponse:
      type: object
      required:
        - id
        - object
        - created
        - model
        - choices
      properties:
        id:
          type: string
          description: Unique completion identifier
          example: cmpl-abc123
        object:
          type: string
          enum: [text_completion]
          description: Object type
        created:
          type: integer
          description: Unix timestamp of creation
        model:
          type: string
          description: Model used for completion
        choices:
          type: array
          items:
            $ref: '#/components/schemas/CompletionChoice'
        usage:
          $ref: '#/components/schemas/Usage'
        lowrouter_metadata:
          $ref: '#/components/schemas/LowRouterMetadata'

    CompletionChoice:
      type: object
      required:
        - index
        - text
        - finish_reason
      properties:
        index:
          type: integer
          description: Index of the choice
        text:
          type: string
          description: Completion text
        finish_reason:
          type: string
          enum: [stop, length, content_filter]
          description: Reason for completion finish

    Usage:
      type: object
      required:
        - prompt_tokens
        - completion_tokens
        - total_tokens
      properties:
        prompt_tokens:
          type: integer
          description: Number of tokens in the prompt
          example: 10
        completion_tokens:
          type: integer
          description: Number of tokens in the completion
          example: 20
        total_tokens:
          type: integer
          description: Total tokens used
          example: 30
        cost:
          type: number
          format: float
          description: Cost of this request, in the currency given by `currency`.
          example: 0.0042
        remaining_balance:
          type: number
          format: float
          description: Remaining account balance after this request, in the currency given by `currency`.
          example: 4.95
        currency:
          type: string
          description: ISO-4217 currency code for `cost` and `remaining_balance` (e.g. `EUR`).
          example: EUR

    LowRouterMetadata:
      type: object
      required:
        - provider
        - routing_mode
        - fallback_occurred
      properties:
        provider:
          type: string
          description: Provider that handled the request
          example: openai
        region:
          type: string
          description: Region where the request was processed
          example: us-east-1
        energy_joules:
          type: number
          format: float
          description: Energy consumed in joules
          example: 145.2
        carbon_gco2e:
          type: number
          format: float
          description: Carbon emissions in grams of CO2 equivalent
          example: 23.4
        estimation_methodology:
          type: string
          description: Methodology used for carbon/energy estimation
          example: TDP-based calculation with regional grid intensity
        carbon_intensity_gco2_per_kwh:
          type: integer
          description: Grid carbon intensity in gCO2/kWh
          example: 460
        routing_mode:
          type: string
          enum: [auto, explicit]
          description: Routing mode used for this request
        routing_reason:
          type: string
          description: Reason for provider selection
          example: lowest_carbon_intensity
          enum:
            - lowest_carbon_intensity
            - lowest_cost
            - lowest_latency
            - round_robin
        fallback_occurred:
          type: boolean
          description: Whether fallback to another provider occurred
        providers_attempted:
          type: array
          items:
            type: string
          description: List of providers attempted (for fallback scenarios)
          example: [openai, anthropic]

    GenerationMetricsResponse:
      type: object
      required:
        - generation_id
        - prompt_tokens
        - completion_tokens
        - total_tokens
        - created_at
      properties:
        generation_id:
          type: string
          description: Unique generation identifier
          example: chatcmpl-abc123
        provider:
          type: string
          description: Provider that handled the request
          example: openai
          nullable: true
        model:
          type: string
          description: Model used for completion
          example: gpt-4
          nullable: true
        prompt_tokens:
          type: integer
          description: Number of tokens in the prompt
          example: 50
        completion_tokens:
          type: integer
          description: Number of tokens in the completion
          example: 100
        total_tokens:
          type: integer
          description: Total tokens used
          example: 150
        energy_joules:
          type: number
          format: float
          description: Energy consumed in joules (nullable if emissions data unavailable)
          example: 0.042
          nullable: true
        carbon_gco2e:
          type: number
          format: float
          description: Carbon emissions in grams CO2 equivalent (nullable if emissions data unavailable)
          example: 0.0000156
          nullable: true
        request_duration_ms:
          type: integer
          description: Request duration in milliseconds
          example: 1250
          nullable: true
        created_at:
          type: string
          format: date-time
          description: Timestamp when the generation was created
          example: "2025-10-24T10:15:30Z"

    ModelsListResponse:
      type: object
      required:
        - data
      properties:
        object:
          type: string
          enum: [list]
          default: list
        data:
          type: array
          items:
            $ref: '#/components/schemas/ModelInfo'

    ModelRetrieveResponse:
      allOf:
        - $ref: '#/components/schemas/ModelInfo'
        - type: object
          required:
            - object
            - created
            - owned_by
          properties:
            object:
              type: string
              enum: [model]
              default: model
            created:
              type: integer
              format: int64
              description: Unix timestamp when the model was created
              example: 1686935002
            owned_by:
              type: string
              description: The organization that owns the model
              example: openai

    ModelInfo:
      type: object
      required:
        - id
        - name
        - provider
      properties:
        id:
          type: string
          description: >-
            Canonical routable model id in the 3-part form
            `{provider}/{creator}/{model}`. Passing this to the completions
            `model` field defaults to the `global` region. To pin a region, use
            one of the 4-part ids in `regions[]` instead.
          example: vertex/anthropic/claude-opus-4.6
        name:
          type: string
          description: Display name of the model
          example: vertex/anthropic/claude-opus-4.6
        provider:
          type: string
          description: Provider name
          example: vertex
        context_length:
          type: integer
          description: Maximum context length in tokens
          example: 8192
        capabilities:
          type: object
          properties:
            streaming:
              type: boolean
              description: Supports streaming responses
            function_calling:
              type: boolean
              description: Supports function calling
          required:
            - streaming
        regions:
          type: array
          description: >-
            Routable per-region variants of this model. Pricing and carbon vary
            by region, so they live here rather than at the model level. `global`
            is always the first entry when present. When a model has no regional
            rows, a single synthesized `global` entry carries the model's pricing.
            Use one of these `id`s (or the 3-part `id` above) as the completions
            `model`.
          items:
            $ref: '#/components/schemas/ModelRegion'
      example:
        id: vertex/anthropic/claude-opus-4.6
        name: vertex/anthropic/claude-opus-4.6
        provider: vertex
        context_length: 200000
        capabilities:
          streaming: true
          function_calling: true
        regions:
          - id: vertex/anthropic/claude-opus-4.6/global
            locode: global
            pricing:
              prompt_per_1m_tokens: 5.0
              completion_per_1m_tokens: 25.0
              currency: EUR
            carbon_metrics:
              GLOBAL-AVERAGE:
                energy_per_token_joules: 0.00012
                carbon_per_token_gco2e: 0.000055
                grid_carbon_intensity_gco2_per_kwh: 475
          - id: vertex/anthropic/claude-opus-4.6/sg-sin
            locode: sg-sin
            pricing:
              prompt_per_1m_tokens: 5.5
              completion_per_1m_tokens: 27.5
              currency: EUR
            carbon_metrics:
              sg-sin:
                energy_per_token_joules: 0.00012
                carbon_per_token_gco2e: 0.00006
                grid_carbon_intensity_gco2_per_kwh: 495

    ModelRegion:
      type: object
      required:
        - id
        - locode
      properties:
        id:
          type: string
          description: >-
            Full routable 4-part id `{provider}/{creator}/{model}/{locode}`.
            Pass this to the completions `model` field to pin this region and be
            billed at its rate.
          example: vertex/anthropic/claude-opus-4.6/sg-sin
        locode:
          type: string
          description: >-
            UN/LOCODE naming the region (e.g. `sg-sin`), or `global` for the
            provider's default endpoint.
          example: sg-sin
        pricing:
          type: object
          properties:
            prompt_per_1m_tokens:
              type: number
              format: float
              description: Price per 1M prompt tokens, in the currency given by `currency`.
              example: 30.0
            completion_per_1m_tokens:
              type: number
              format: float
              description: Price per 1M completion tokens, in the currency given by `currency`.
              example: 60.0
            cache_read_per_1m_tokens:
              type: number
              format: float
              description: >-
                Price per 1M cached prompt tokens read from the provider cache,
                in the currency given by `currency`. Omitted when the model has
                no cache pricing.
              example: 0.09
            cache_write_per_1m_tokens:
              type: number
              format: float
              description: >-
                Price per 1M prompt tokens written to the provider cache, in the
                currency given by `currency`. Omitted when the model has no cache
                pricing.
              example: 1.11
            batch_prompt_per_1m_tokens:
              type: number
              format: float
              description: >-
                Price per 1M prompt tokens for batch requests, in the currency
                given by `currency`. Omitted when the model has no batch pricing.
              example: 15.0
            batch_completion_per_1m_tokens:
              type: number
              format: float
              description: >-
                Price per 1M completion tokens for batch requests, in the
                currency given by `currency`. Omitted when the model has no batch
                pricing.
              example: 30.0
            currency:
              type: string
              description: ISO-4217 currency code for the prices above (e.g. `EUR`).
              example: EUR
        carbon_metrics:
          type: object
          description: >-
            Carbon intensity for this region, keyed by region label
            (`GLOBAL-AVERAGE` for the synthesized global entry).
          additionalProperties:
            type: object
            properties:
              energy_per_token_joules:
                type: number
                format: float
                description: Energy per token in joules
              carbon_per_token_gco2e:
                type: number
                format: float
                description: Carbon per token in gCO2e
              grid_carbon_intensity_gco2_per_kwh:
                type: integer
                description: Grid carbon intensity
          example:
            us-east-1:
              energy_per_token_joules: 0.00012
              carbon_per_token_gco2e: 0.000055
              grid_carbon_intensity_gco2_per_kwh: 460

    ProvidersListResponse:
      type: object
      required:
        - data
      properties:
        object:
          type: string
          enum: [list]
          default: list
        data:
          type: array
          items:
            $ref: '#/components/schemas/ProviderInfo'

    ProviderInfo:
      type: object
      required:
        - name
        - status
      properties:
        name:
          type: string
          description: Provider name
          example: openai
        status:
          type: string
          enum: [healthy, degraded, down]
          description: Current provider health status
        regions:
          type: array
          items:
            type: string
          description: Supported regions
          example: [us-east-1, us-west-2, eu-west-1]
        model_count:
          type: integer
          description: Number of models offered by this provider

    GenerationStats:
      type: object
      required:
        - generation_id
        - model
        - provider
        - region
        - usage
        - carbon_metrics
        - created_at
      properties:
        generation_id:
          type: string
          description: Unique generation identifier
        model:
          type: string
          description: Model used
        provider:
          type: string
          description: Provider that handled the request
        region:
          type: string
          description: Region where processed
        usage:
          $ref: '#/components/schemas/Usage'
        cost:
          type: object
          properties:
            prompt_cost:
              type: number
              format: float
            completion_cost:
              type: number
              format: float
            total_cost:
              type: number
              format: float
            currency:
              type: string
              description: ISO-4217 currency code for the cost values above (e.g. `EUR`).
              example: EUR
        carbon_metrics:
          type: object
          properties:
            energy_joules:
              type: number
              format: float
            carbon_gco2e:
              type: number
              format: float
            grid_carbon_intensity_gco2_per_kwh:
              type: integer
        latency:
          type: object
          properties:
            request_duration_ms:
              type: integer
            time_to_first_token_ms:
              type: integer
              nullable: true
        routing_info:
          type: object
          properties:
            routing_mode:
              type: string
              enum: [auto, explicit]
            routing_reason:
              type: string
            fallback_occurred:
              type: boolean
            providers_attempted:
              type: array
              items:
                type: string
        created_at:
          type: string
          format: date-time
          description: Timestamp of generation

    ErrorResponse:
      type: object
      required:
        - error
      properties:
        error:
          $ref: '#/components/schemas/ErrorDetail'

    ErrorDetail:
      type: object
      required:
        - message
      properties:
        type:
          type: string
          enum:
            - invalid_request_error
            - authentication_error
            - rate_limit_error
            - provider_error
            - provider_unavailable
            - internal_server_error
            - not_found
          description: Error type matching OpenAI error format
        message:
          type: string
          description: Human-readable error message
        code:
          type: string
          description: Machine-readable error code
          nullable: true
        param:
          type: string
          description: Parameter that caused the error (if applicable)
          nullable: true
        providers_attempted:
          type: array
          items:
            type: string
          description: Providers attempted before failure (fallback context)
        last_error:
          type: string
          description: Last error message from provider (fallback context)

  responses:
    UnauthorizedError:
      description: Authentication failed
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            error:
              type: authentication_error
              message: Invalid API key provided
              code: invalid_api_key

    BadRequestError:
      description: Invalid request
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            error:
              type: invalid_request_error
              message: Missing required field 'messages'
              code: missing_required_field
              param: messages

    NotFoundError:
      description: Resource not found
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            error:
              type: not_found
              message: Generation not found
              code: null
              param: generation_id

    InternalServerError:
      description: Internal server error
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            error:
              type: internal_server_error
              message: An unexpected error occurred
              code: internal_error

tags:
  - name: Completions
    description: Chat and text completion endpoints
  - name: Models
    description: Model listing and information
  - name: Providers
    description: Provider status and information
  - name: Generations
    description: Generation statistics and history
  - name: Metrics
    description: Carbon and energy metrics for generations