# OpenAI Compatible Endpoints

Endpoints compatible with OpenAI's API specification

## Create Chat Completions

> Create Chat Completions by calling an available model in a format that is compatible with the OpenAI API.\
> Supports both text-only and multimodal (text + images) inputs for compatible models.\
> Rate limits apply per contract. Default limits apply unless a custom rate limit is configured for your contract.\
> Exceeding the limit returns HTTP 429 with a Retry-After header.<br>

```json
{"openapi":"3.0.3","info":{"title":"IONOS Cloud - OpenAI compatible AI Model Hub API","version":"1.0.0"},"tags":[{"name":"OpenAI Compatible Endpoints","description":"Endpoints compatible with OpenAI's API specification"}],"servers":[{"url":"https://openai.inference.de-txl.ionos.com","description":"Berlin"}],"security":[{"tokenAuth":[]}],"components":{"securitySchemes":{"tokenAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Please provide header value as 'Bearer <token>' and don't forget to add 'Bearer' HTTP Authorization Scheme before the token."}},"schemas":{"ChatCompletionRequest":{"type":"object","properties":{"model":{"type":"string","description":"ID of the model to use"},"messages":{"type":"array","items":{"$ref":"#/components/schemas/ChatCompletionMessage"}},"response_format":{"description":"An object specifying the format that the model must output. Use json_object for JSON mode or json_schema to enforce a specific schema (Structured Outputs). If omitted, default text output is used.\n","oneOf":[{"$ref":"#/components/schemas/ResponseFormatJSONSchema"},{"$ref":"#/components/schemas/ResponseFormatJSONObject"},{"$ref":"#/components/schemas/ResponseFormatText"}]},"temperature":{"type":"number","description":"The sampling temperature to be used","default":1},"top_p":{"type":"number","description":"An alternative to sampling with temperature","default":-1},"n":{"type":"integer","description":"The number of chat completion choices to generate for each input message","default":1},"stream":{"default":false,"type":"boolean","description":"If set to true, it sends partial message deltas"},"stop":{"type":"array","items":{"type":"string"},"description":"Up to 4 sequences where the API will stop generating further tokens"},"max_tokens":{"type":"integer","description":"The maximum number of tokens to generate in the chat. This value is now deprecated in favor of max_completion_tokens completion","default":16,"deprecated":true},"max_completion_tokens":{"type":"integer","description":"An upper bound for the number of tokens that can be generated for a completion, including visible output tokens","default":16},"presence_penalty":{"type":"number","description":"It is used to penalize new tokens based on their existence in the text so far","default":0},"frequency_penalty":{"type":"number","description":"It is used to penalize new tokens based on their frequency in the text so far","default":0},"logit_bias":{"type":"object","description":"Used to modify the probability of specific tokens appearing in the completion"},"user":{"type":"string","description":"A unique identifier representing your end-user"},"tools":{"type":"array","description":"A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.\n","items":{"$ref":"#/components/schemas/chatCompletionTool"}},"tool_choice":{"$ref":"#/components/schemas/chatCompletionToolChoiceOption"}},"required":["model","messages"]},"ChatCompletionMessage":{"type":"object","description":"A message in a chat completion request, supporting both text-only and multimodal content","properties":{"role":{"type":"string","description":"The role of the message's author","enum":["system","user","assistant","tool"]},"content":{"oneOf":[{"type":"string","description":"Text content of the message (legacy format)"},{"type":"array","description":"Array of content parts for multimodal messages","items":{"$ref":"#/components/schemas/ChatCompletionContentPart"}}]},"name":{"type":"string","description":"The name of the author of the message"},"tool_calls":{"type":"array","description":"Tool calls generated by the model","items":{"$ref":"#/components/schemas/chatCompletionMessageToolCall"}},"tool_call_id":{"type":"string","description":"Tool call that this message is responding to"}},"required":["role","content"]},"ChatCompletionContentPart":{"oneOf":[{"$ref":"#/components/schemas/ChatCompletionContentPartText"},{"$ref":"#/components/schemas/ChatCompletionContentPartImage"}],"discriminator":{"propertyName":"type","mapping":{"text":"#/components/schemas/ChatCompletionContentPartText","image_url":"#/components/schemas/ChatCompletionContentPartImage"}}},"ChatCompletionContentPartText":{"type":"object","description":"Text content part","properties":{"type":{"type":"string","enum":["text"],"description":"The type of the content part"},"text":{"type":"string","description":"The text content"}},"required":["type","text"]},"ChatCompletionContentPartImage":{"type":"object","description":"Image content part","properties":{"type":{"type":"string","enum":["image_url"],"description":"The type of the content part"},"image_url":{"$ref":"#/components/schemas/ChatCompletionContentPartImageImageUrl"}},"required":["type","image_url"]},"ChatCompletionContentPartImageImageUrl":{"type":"object","description":"Image URL or data","properties":{"url":{"type":"string","description":"Either a URL of the image or the base64 encoded image data.\nSupported formats:\n- URLs: https://example.com/image.jpg\n- Base64 data URLs: data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQ...\n"},"detail":{"type":"string","description":"Specifies the detail level of the image. \n- \"low\" uses fewer tokens and is faster\n- \"high\" is more detailed but uses more tokens\n- \"auto\" lets the model choose\n","enum":["low","high","auto"],"default":"auto"}},"required":["url"]},"chatCompletionMessageToolCall":{"type":"object","properties":{"id":{"type":"string","description":"The ID of the tool call."},"type":{"$ref":"#/components/schemas/toolCallType"},"function":{"type":"object","description":"The function that the model called.","properties":{"name":{"type":"string","description":"The name of the function to call."},"arguments":{"type":"string","description":"The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."}},"required":["name","arguments"]}},"required":["id","type","function"]},"toolCallType":{"type":"string","enum":["function"],"description":"The type of the tool call, in this case `function`."},"ResponseFormatJSONSchema":{"type":"object","description":"Enable Structured Outputs; model must produce JSON adhering to supplied schema.","properties":{"type":{"type":"string","enum":["json_schema"]},"json_schema":{"type":"object","properties":{"name":{"type":"string","description":"Identifier for the schema definition."},"schema":{"type":"object","description":"Root JSON Schema object (must be object with additionalProperties=false)."},"strict":{"type":"boolean","description":"If true, instruct model to strictly follow schema.","default":true}},"required":["name","schema"],"additionalProperties":false}},"required":["type","json_schema"],"additionalProperties":false},"ResponseFormatJSONObject":{"type":"object","description":"Enable JSON mode; model outputs a single valid JSON object (no schema enforcement).","properties":{"type":{"type":"string","enum":["json_object"]}},"required":["type"],"additionalProperties":false},"ResponseFormatText":{"type":"object","description":"Request normal free-form text output (default behavior).","properties":{"type":{"type":"string","enum":["text"]}},"required":["type"],"additionalProperties":false},"chatCompletionTool":{"type":"object","properties":{"type":{"type":"string","enum":["function"],"description":"The type of the tool. Currently, only `function` is supported."},"function":{"$ref":"#/components/schemas/FunctionObject"}},"required":["type","function"]},"FunctionObject":{"type":"object","properties":{"description":{"type":"string","description":"A description of what the function does, used by the model to choose when and how to call the function."},"name":{"type":"string","description":"The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."},"parameters":{"$ref":"#/components/schemas/FunctionParameters"}},"required":["name"]},"FunctionParameters":{"type":"object","description":"The parameters the functions accepts, described as a JSON Schema object.","additionalProperties":true},"chatCompletionToolChoiceOption":{"description":"Controls which (if any) tool is called by the model.\n`none` means the model will not call any tool and instead generates a message.\n`auto` means the model can pick between generating a message or calling one or more tools.\n`required` means the model must call one or more tools.\nSpecifying a particular tool via `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}` forces the model to call that tool.\n`none` is the default when no tools are present. `auto` is the default if tools are present.\n","oneOf":[{"type":"string","description":"`none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. `required` means the model must call one or more tools.\n","enum":["none","auto","required"]},{"$ref":"#/components/schemas/chatCompletionNamedToolChoice"}]},"chatCompletionNamedToolChoice":{"type":"object","description":"Specifies a tool the model should use. Use to force the model to call a specific function.","properties":{"type":{"type":"string","enum":["function"],"description":"The type of the tool. Currently, only `function` is supported."},"function":{"type":"object","properties":{"name":{"type":"string","description":"The name of the function to call."}},"required":["name"]}},"required":["type","function"]},"SuccessfulChatCompletionResponse":{"type":"object","properties":{"id":{"type":"string"},"choices":{"type":"array","items":{"type":"object","properties":{"finish_reason":{"type":"string"},"index":{"type":"integer"},"message":{"type":"object","properties":{"role":{"type":"string"},"content":{"type":"string"},"tool_calls":{"type":"array","items":{"$ref":"#/components/schemas/chatCompletionMessageToolCall"},"readOnly":true},"refusal":{"type":"string","description":"Present when the model refuses to answer for safety/policy reasons."}}}}}},"created":{"type":"integer"},"object":{"type":"string"},"model":{"type":"string"},"system_fingerprint":{"type":"string"},"usage":{"type":"object","properties":{"prompt_tokens":{"type":"integer"},"completion_tokens":{"type":"integer"},"total_tokens":{"type":"integer"}}}}},"ErrorResponse":{"type":"object","description":"OpenAI-compatible error response. Returned for error responses that define this schema (for example, 429 Too Many Requests). Note: the backend currently returns an IONOS-native error body format; alignment to this schema is tracked in GPHML-2078.\n","properties":{"error":{"type":"object","properties":{"message":{"type":"string","description":"Human-readable error description."},"type":{"type":"string","description":"Error category (e.g. \"requests\", \"invalid_request_error\")."},"code":{"type":"string","nullable":true,"description":"Machine-readable error code (e.g. \"rate_limit_exceeded\"). May be null or omitted."},"param":{"type":"string","nullable":true,"description":"Parameter related to the error, if applicable. May be null or omitted."}},"required":["message","type"]}},"required":["error"]}},"headers":{"X-RateLimit-Limit":{"description":"Sustained request rate limit expressed as requests per minute. Enforcement is per second — see X-RateLimit-Burst for the short-window limit. The actual value depends on your contract; the default is 300 req/min (5 req/s).\n","schema":{"type":"integer"}},"X-RateLimit-Burst":{"description":"Maximum number of requests allowed within a 2-second burst window. Exceeding this triggers a 429 response. The actual value depends on your contract; the default is 10 requests per burst window.\n","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Requests remaining in the current 2-second burst window. Resets when the burst window expires. Does not reflect the per-minute X-RateLimit-Limit.\n","schema":{"type":"integer"}}},"responses":{"TooManyRequests":{"description":"Rate limit exceeded. Retry after the number of seconds indicated by the Retry-After header. Limits are contract-specific; check X-RateLimit-Limit and X-RateLimit-Burst in the response headers for the values that apply to your contract.\n","headers":{"Retry-After":{"description":"Number of seconds to wait before retrying.","schema":{"type":"integer"}},"X-RateLimit-Limit":{"$ref":"#/components/headers/X-RateLimit-Limit"},"X-RateLimit-Burst":{"$ref":"#/components/headers/X-RateLimit-Burst"},"X-RateLimit-Remaining":{"$ref":"#/components/headers/X-RateLimit-Remaining"}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}},"paths":{"/v1/chat/completions":{"post":{"operationId":"openaiCompatChatCompletionsPost","summary":"Create Chat Completions","description":"Create Chat Completions by calling an available model in a format that is compatible with the OpenAI API.\nSupports both text-only and multimodal (text + images) inputs for compatible models.\nRate limits apply per contract. Default limits apply unless a custom rate limit is configured for your contract.\nExceeding the limit returns HTTP 429 with a Retry-After header.\n","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionRequest"}}}},"responses":{"200":{"description":"Successful operation","headers":{"X-RateLimit-Limit":{"$ref":"#/components/headers/X-RateLimit-Limit"},"X-RateLimit-Burst":{"$ref":"#/components/headers/X-RateLimit-Burst"},"X-RateLimit-Remaining":{"$ref":"#/components/headers/X-RateLimit-Remaining"}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SuccessfulChatCompletionResponse"}}}},"400":{"description":"Bad request"},"429":{"$ref":"#/components/responses/TooManyRequests"},"500":{"description":"Server error"}},"tags":["OpenAI Compatible Endpoints"]}}}}
```

## Create Completions

> Create Completions by calling an available model in a format that is compatible with the OpenAI API<br>

```json
{"openapi":"3.0.3","info":{"title":"IONOS Cloud - OpenAI compatible AI Model Hub API","version":"1.0.0"},"tags":[{"name":"OpenAI Compatible Endpoints","description":"Endpoints compatible with OpenAI's API specification"}],"servers":[{"url":"https://openai.inference.de-txl.ionos.com","description":"Berlin"}],"security":[{"tokenAuth":[]}],"components":{"securitySchemes":{"tokenAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Please provide header value as 'Bearer <token>' and don't forget to add 'Bearer' HTTP Authorization Scheme before the token."}},"schemas":{"CompletionRequest":{"type":"object","properties":{"model":{"type":"string","description":"ID of the model to use"},"prompt":{"type":"string","description":"The prompt to generate completions from"},"temperature":{"type":"number","description":"The sampling temperature to be used"},"top_p":{"type":"number","description":"An alternative to sampling with temperature"},"n":{"type":"integer","description":"The number of chat completion choices to generate for each input message"},"stream":{"type":"boolean","description":"If set to true, it sends partial message deltas"},"stop":{"type":"array","items":{"type":"string"},"description":"Up to 4 sequences where the API will stop generating further tokens"},"max_tokens":{"type":"integer","description":"The maximum number of tokens to generate in the chat completion"},"presence_penalty":{"type":"number","description":"It is used to penalize new tokens based on their existence in the text so far"},"frequency_penalty":{"type":"number","description":"It is used to penalize new tokens based on their frequency in the text so far"},"logit_bias":{"type":"object","description":"Used to modify the probability of specific tokens appearing in the completion"},"user":{"type":"string","description":"A unique identifier representing your end-user"}},"required":["model","prompt"]},"SuccessfulCompletionResponse":{"type":"object","properties":{"id":{"type":"string"},"choices":{"type":"array","items":{"type":"object","properties":{"finish_reason":{"type":"string"},"index":{"type":"integer"},"text":{"type":"string"}}}},"created":{"type":"integer"},"object":{"type":"string"},"model":{"type":"string"},"usage":{"type":"object","properties":{"prompt_tokens":{"type":"integer"},"completion_tokens":{"type":"integer"},"total_tokens":{"type":"integer"}}}}},"ErrorResponse":{"type":"object","description":"OpenAI-compatible error response. Returned for error responses that define this schema (for example, 429 Too Many Requests). Note: the backend currently returns an IONOS-native error body format; alignment to this schema is tracked in GPHML-2078.\n","properties":{"error":{"type":"object","properties":{"message":{"type":"string","description":"Human-readable error description."},"type":{"type":"string","description":"Error category (e.g. \"requests\", \"invalid_request_error\")."},"code":{"type":"string","nullable":true,"description":"Machine-readable error code (e.g. \"rate_limit_exceeded\"). May be null or omitted."},"param":{"type":"string","nullable":true,"description":"Parameter related to the error, if applicable. May be null or omitted."}},"required":["message","type"]}},"required":["error"]}},"headers":{"X-RateLimit-Limit":{"description":"Sustained request rate limit expressed as requests per minute. Enforcement is per second — see X-RateLimit-Burst for the short-window limit. The actual value depends on your contract; the default is 300 req/min (5 req/s).\n","schema":{"type":"integer"}},"X-RateLimit-Burst":{"description":"Maximum number of requests allowed within a 2-second burst window. Exceeding this triggers a 429 response. The actual value depends on your contract; the default is 10 requests per burst window.\n","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Requests remaining in the current 2-second burst window. Resets when the burst window expires. Does not reflect the per-minute X-RateLimit-Limit.\n","schema":{"type":"integer"}}},"responses":{"TooManyRequests":{"description":"Rate limit exceeded. Retry after the number of seconds indicated by the Retry-After header. Limits are contract-specific; check X-RateLimit-Limit and X-RateLimit-Burst in the response headers for the values that apply to your contract.\n","headers":{"Retry-After":{"description":"Number of seconds to wait before retrying.","schema":{"type":"integer"}},"X-RateLimit-Limit":{"$ref":"#/components/headers/X-RateLimit-Limit"},"X-RateLimit-Burst":{"$ref":"#/components/headers/X-RateLimit-Burst"},"X-RateLimit-Remaining":{"$ref":"#/components/headers/X-RateLimit-Remaining"}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}},"paths":{"/v1/completions":{"post":{"operationId":"openaiCompatCompletionsPost","summary":"Create Completions","description":"Create Completions by calling an available model in a format that is compatible with the OpenAI API\n","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CompletionRequest"}}}},"responses":{"200":{"description":"Successful operation","headers":{"X-RateLimit-Limit":{"$ref":"#/components/headers/X-RateLimit-Limit"},"X-RateLimit-Burst":{"$ref":"#/components/headers/X-RateLimit-Burst"},"X-RateLimit-Remaining":{"$ref":"#/components/headers/X-RateLimit-Remaining"}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SuccessfulCompletionResponse"}}}},"400":{"description":"Bad request"},"429":{"$ref":"#/components/responses/TooManyRequests"},"500":{"description":"Server error"}},"tags":["OpenAI Compatible Endpoints"]}}}}
```

## Get the entire list of available models

> Get the entire list of available models in a format that is compatible with the OpenAI API<br>

```json
{"openapi":"3.0.3","info":{"title":"IONOS Cloud - OpenAI compatible AI Model Hub API","version":"1.0.0"},"tags":[{"name":"OpenAI Compatible Endpoints","description":"Endpoints compatible with OpenAI's API specification"}],"servers":[{"url":"https://openai.inference.de-txl.ionos.com","description":"Berlin"}],"security":[{"tokenAuth":[]}],"components":{"securitySchemes":{"tokenAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Please provide header value as 'Bearer <token>' and don't forget to add 'Bearer' HTTP Authorization Scheme before the token."}},"headers":{"X-RateLimit-Limit":{"description":"Sustained request rate limit expressed as requests per minute. Enforcement is per second — see X-RateLimit-Burst for the short-window limit. The actual value depends on your contract; the default is 300 req/min (5 req/s).\n","schema":{"type":"integer"}},"X-RateLimit-Burst":{"description":"Maximum number of requests allowed within a 2-second burst window. Exceeding this triggers a 429 response. The actual value depends on your contract; the default is 10 requests per burst window.\n","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Requests remaining in the current 2-second burst window. Resets when the burst window expires. Does not reflect the per-minute X-RateLimit-Limit.\n","schema":{"type":"integer"}}},"responses":{"TooManyRequests":{"description":"Rate limit exceeded. Retry after the number of seconds indicated by the Retry-After header. Limits are contract-specific; check X-RateLimit-Limit and X-RateLimit-Burst in the response headers for the values that apply to your contract.\n","headers":{"Retry-After":{"description":"Number of seconds to wait before retrying.","schema":{"type":"integer"}},"X-RateLimit-Limit":{"$ref":"#/components/headers/X-RateLimit-Limit"},"X-RateLimit-Burst":{"$ref":"#/components/headers/X-RateLimit-Burst"},"X-RateLimit-Remaining":{"$ref":"#/components/headers/X-RateLimit-Remaining"}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}},"schemas":{"ErrorResponse":{"type":"object","description":"OpenAI-compatible error response. Returned for error responses that define this schema (for example, 429 Too Many Requests). Note: the backend currently returns an IONOS-native error body format; alignment to this schema is tracked in GPHML-2078.\n","properties":{"error":{"type":"object","properties":{"message":{"type":"string","description":"Human-readable error description."},"type":{"type":"string","description":"Error category (e.g. \"requests\", \"invalid_request_error\")."},"code":{"type":"string","nullable":true,"description":"Machine-readable error code (e.g. \"rate_limit_exceeded\"). May be null or omitted."},"param":{"type":"string","nullable":true,"description":"Parameter related to the error, if applicable. May be null or omitted."}},"required":["message","type"]}},"required":["error"]}}},"paths":{"/v1/models":{"get":{"operationId":"openaiCompatModelsGet","summary":"Get the entire list of available models","description":"Get the entire list of available models in a format that is compatible with the OpenAI API\n","responses":{"200":{"description":"Successful operation","headers":{"X-RateLimit-Limit":{"$ref":"#/components/headers/X-RateLimit-Limit"},"X-RateLimit-Burst":{"$ref":"#/components/headers/X-RateLimit-Burst"},"X-RateLimit-Remaining":{"$ref":"#/components/headers/X-RateLimit-Remaining"}},"content":{"application/json":{"schema":{"type":"object","properties":{"object":{"type":"string","description":"Type of object returned, always \"list\"."},"data":{"type":"array","description":"List of available models.","items":{"type":"object","properties":{"id":{"type":"string","description":"Unique identifier for the model."},"object":{"type":"string","description":"Type of object returned, always \"model\"."},"created":{"type":"integer","format":"int64","description":"Unix timestamp (in seconds) when the model was created."},"owned_by":{"type":"string","description":"Owner of the model."}},"required":["id","object","created","owned_by"]}}},"required":["object","data"]}}}},"429":{"$ref":"#/components/responses/TooManyRequests"}},"tags":["OpenAI Compatible Endpoints"]}}}}
```

## Generate an image using a model

> Generate an image using a model in a format that is compatible with the OpenAI API<br>

```json
{"openapi":"3.0.3","info":{"title":"IONOS Cloud - OpenAI compatible AI Model Hub API","version":"1.0.0"},"tags":[{"name":"OpenAI Compatible Endpoints","description":"Endpoints compatible with OpenAI's API specification"}],"servers":[{"url":"https://openai.inference.de-txl.ionos.com","description":"Berlin"}],"security":[{"tokenAuth":[]}],"components":{"securitySchemes":{"tokenAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Please provide header value as 'Bearer <token>' and don't forget to add 'Bearer' HTTP Authorization Scheme before the token."}},"schemas":{"ImageGenerationRequest":{"type":"object","properties":{"model":{"type":"string","description":"ID of the model to use. Please check /v1/models for available models"},"prompt":{"type":"string","description":"The prompt to generate images from"},"n":{"type":"integer","default":1,"description":"The number of images to generate. Defaults to 1."},"size":{"type":"string","default":"1024*1024","description":"The size of the image to generate.\nDefaults to `\"1024*1024\"`.\nMust be one of `\"1024*1024\"`, `\"1792*1024\"`, or `\"1024*1792\"`.\nThe maximum supported resolution is `\"1792*1024\"`\n"},"response_format":{"type":"string","description":"The format of the response.","default":"b64_json","enum":["b64_json"]},"user":{"type":"string","description":"A unique identifier representing your end-user"}},"required":["model","prompt"]},"ImageGenerationResponse":{"type":"object","properties":{"created":{"type":"integer","description":"The Unix timestamp of the image generation"},"data":{"type":"array","description":"The list of generated images","items":{"type":"object","properties":{"url":{"type":"string","description":"The URL of the generated image.\nAs of right now, this format is not supported\n","default":null},"b64_json":{"type":"string","description":"The base64 encoded image in JSON format.\n"},"revised_prompt":{"type":"string","description":"Refined initial text prompt for improved image generation using advanced language model capabilities.\nAs of right now, this format is not supported\n"}}}}}},"ErrorResponse":{"type":"object","description":"OpenAI-compatible error response. Returned for error responses that define this schema (for example, 429 Too Many Requests). Note: the backend currently returns an IONOS-native error body format; alignment to this schema is tracked in GPHML-2078.\n","properties":{"error":{"type":"object","properties":{"message":{"type":"string","description":"Human-readable error description."},"type":{"type":"string","description":"Error category (e.g. \"requests\", \"invalid_request_error\")."},"code":{"type":"string","nullable":true,"description":"Machine-readable error code (e.g. \"rate_limit_exceeded\"). May be null or omitted."},"param":{"type":"string","nullable":true,"description":"Parameter related to the error, if applicable. May be null or omitted."}},"required":["message","type"]}},"required":["error"]}},"headers":{"X-RateLimit-Limit":{"description":"Sustained request rate limit expressed as requests per minute. Enforcement is per second — see X-RateLimit-Burst for the short-window limit. The actual value depends on your contract; the default is 300 req/min (5 req/s).\n","schema":{"type":"integer"}},"X-RateLimit-Burst":{"description":"Maximum number of requests allowed within a 2-second burst window. Exceeding this triggers a 429 response. The actual value depends on your contract; the default is 10 requests per burst window.\n","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Requests remaining in the current 2-second burst window. Resets when the burst window expires. Does not reflect the per-minute X-RateLimit-Limit.\n","schema":{"type":"integer"}}},"responses":{"TooManyRequests":{"description":"Rate limit exceeded. Retry after the number of seconds indicated by the Retry-After header. Limits are contract-specific; check X-RateLimit-Limit and X-RateLimit-Burst in the response headers for the values that apply to your contract.\n","headers":{"Retry-After":{"description":"Number of seconds to wait before retrying.","schema":{"type":"integer"}},"X-RateLimit-Limit":{"$ref":"#/components/headers/X-RateLimit-Limit"},"X-RateLimit-Burst":{"$ref":"#/components/headers/X-RateLimit-Burst"},"X-RateLimit-Remaining":{"$ref":"#/components/headers/X-RateLimit-Remaining"}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}},"paths":{"/v1/images/generations":{"post":{"operationId":"openaiCompatImagesGenerationsPost","summary":"Generate an image using a model","description":"Generate an image using a model in a format that is compatible with the OpenAI API\n","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageGenerationRequest"}}}},"responses":{"200":{"description":"Successful operation","headers":{"X-RateLimit-Limit":{"$ref":"#/components/headers/X-RateLimit-Limit"},"X-RateLimit-Burst":{"$ref":"#/components/headers/X-RateLimit-Burst"},"X-RateLimit-Remaining":{"$ref":"#/components/headers/X-RateLimit-Remaining"}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageGenerationResponse"}}}},"400":{"description":"Bad request"},"429":{"$ref":"#/components/responses/TooManyRequests"},"500":{"description":"Server error"}},"tags":["OpenAI Compatible Endpoints"]}}}}
```

## Creates an embedding vector.

> Creates an embedding vector representing the input text.<br>

```json
{"openapi":"3.0.3","info":{"title":"IONOS Cloud - OpenAI compatible AI Model Hub API","version":"1.0.0"},"tags":[{"name":"OpenAI Compatible Endpoints","description":"Endpoints compatible with OpenAI's API specification"}],"servers":[{"url":"https://openai.inference.de-txl.ionos.com","description":"Berlin"}],"security":[{"tokenAuth":[]}],"components":{"securitySchemes":{"tokenAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Please provide header value as 'Bearer <token>' and don't forget to add 'Bearer' HTTP Authorization Scheme before the token."}},"schemas":{"EmbeddingRequest":{"type":"object","properties":{"model":{"type":"string","description":"ID of the model to use. Please check /v1/models for available models"},"input":{"oneOf":[{"type":"string","description":"The input text to create an embedding for (single string)"},{"type":"array","description":"The input text to create embeddings for (list of strings)","items":{"type":"string"}}]}}},"EmbeddingResponse":{"type":"object","properties":{"model":{"type":"string","description":"ID of the model used"},"object":{"type":"string","description":"The object used to generate the embeddings"},"data":{"type":"array","description":"The list of generated embeddings","items":{"$ref":"#/components/schemas/Embedding"}},"usage":{"$ref":"#/components/schemas/EmbeddingUsage"}}},"Embedding":{"type":"object","properties":{"index":{"type":"integer","description":"The index of the input text"},"object":{"type":"string","description":"The object used to generate the embeddings"},"embedding":{"type":"array","description":"The embedding vector","items":{"type":"number","format":"float"}}}},"EmbeddingUsage":{"type":"object","properties":{"prompt_tokens":{"type":"integer","description":"The number of tokens in the input text"},"total_tokens":{"type":"integer","description":"The total number of tokens used"}}},"ErrorResponse":{"type":"object","description":"OpenAI-compatible error response. Returned for error responses that define this schema (for example, 429 Too Many Requests). Note: the backend currently returns an IONOS-native error body format; alignment to this schema is tracked in GPHML-2078.\n","properties":{"error":{"type":"object","properties":{"message":{"type":"string","description":"Human-readable error description."},"type":{"type":"string","description":"Error category (e.g. \"requests\", \"invalid_request_error\")."},"code":{"type":"string","nullable":true,"description":"Machine-readable error code (e.g. \"rate_limit_exceeded\"). May be null or omitted."},"param":{"type":"string","nullable":true,"description":"Parameter related to the error, if applicable. May be null or omitted."}},"required":["message","type"]}},"required":["error"]}},"headers":{"X-RateLimit-Limit":{"description":"Sustained request rate limit expressed as requests per minute. Enforcement is per second — see X-RateLimit-Burst for the short-window limit. The actual value depends on your contract; the default is 300 req/min (5 req/s).\n","schema":{"type":"integer"}},"X-RateLimit-Burst":{"description":"Maximum number of requests allowed within a 2-second burst window. Exceeding this triggers a 429 response. The actual value depends on your contract; the default is 10 requests per burst window.\n","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Requests remaining in the current 2-second burst window. Resets when the burst window expires. Does not reflect the per-minute X-RateLimit-Limit.\n","schema":{"type":"integer"}}},"responses":{"TooManyRequests":{"description":"Rate limit exceeded. Retry after the number of seconds indicated by the Retry-After header. Limits are contract-specific; check X-RateLimit-Limit and X-RateLimit-Burst in the response headers for the values that apply to your contract.\n","headers":{"Retry-After":{"description":"Number of seconds to wait before retrying.","schema":{"type":"integer"}},"X-RateLimit-Limit":{"$ref":"#/components/headers/X-RateLimit-Limit"},"X-RateLimit-Burst":{"$ref":"#/components/headers/X-RateLimit-Burst"},"X-RateLimit-Remaining":{"$ref":"#/components/headers/X-RateLimit-Remaining"}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}},"paths":{"/v1/embeddings":{"post":{"operationId":"openaiCompatEmbeddingsPost","summary":"Creates an embedding vector.","description":"Creates an embedding vector representing the input text.\n","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/EmbeddingRequest"}}}},"responses":{"200":{"description":"Successful operation","headers":{"X-RateLimit-Limit":{"$ref":"#/components/headers/X-RateLimit-Limit"},"X-RateLimit-Burst":{"$ref":"#/components/headers/X-RateLimit-Burst"},"X-RateLimit-Remaining":{"$ref":"#/components/headers/X-RateLimit-Remaining"}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/EmbeddingResponse"}}}},"400":{"description":"Bad request"},"429":{"$ref":"#/components/responses/TooManyRequests"},"500":{"description":"Server error"}},"tags":["OpenAI Compatible Endpoints"]}}}}
```