代码大模型接口文档

# 文本生成推理 API

# POST /

如果 stream == false,则生成标记;如果 stream == true,则生成标记流

如果stream == falsestream == true

# 参数

无参数

# 请求正文Request body

应用程序/json

  • 示例值|架构
{
     "inputs": "My name is Olivier and I",
     "parameters": {
       "best_of": 1,
       "decoder_input_details": false,
       "details": true,
       "do_sample": true,
       "frequency_penalty": 0.1,
       "grammar": null,
       "max_new_tokens": 20,
       "repetition_penalty": 1.03,
       "return_full_text": false,
       "seed": null,
       "stop": [
         "photographer"
       ],
       "temperature": 0.5,
       "top_k": 10,
       "top_n_tokens": 5,
       "top_p": 0.95,
       "truncate": null,
       "typical_p": 0.95,
       "watermark": true
     },
     "stream": false
   }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

# 响应

代码 描述 链接
200

生成的文本

媒体类型

应用程序/json文本/事件流

控制标题。Accept

- 示例值|架构

{

  "details": {

        "best_of_sequences": [

            {

                "finish_reason": "Length",

                "generated_text": "test",

                "generated_tokens": 1,

                "prefill": [

                    {

                        "id": 0,

                        "logprob": -0.34,

                        "text": "test"

                    }

                ],

                "seed": 42,

                "tokens": [

                    {

                        "id": 0,

                        "logprob": -0.34,

                        "special": false,

                        "text": "test"

                    }

                ],

                "top_tokens": [

                    [

                        {

                            "id": 0,

                            "logprob": -0.34,

                            "special": false,

                            "text": "test"

                        }

                    ]

                ]

            }

        ],

        "finish_reason": "Length",

        "generated_tokens": 1,

        "prefill": [

            {

                "id": 0,

                "logprob": -0.34,

                "text": "test"

            }

        ],

        "seed": 42,

        "tokens": [

            {

                "id": 0,

                "logprob": -0.34,

                "special": false,

                "text": "test"

            }

        ],

        "top_tokens": [

            [

                {

                    "id": 0,

                    "logprob": -0.34,

                    "special": false,

                    "text": "test"

                }

            ]

        ]

  },

  "generated_text": "test"

}

没有链接
422

输入验证错误

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Input validation error"

}

没有链接
424

生成错误

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Request failed during generation"

}

没有链接
429

模型超载

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Model is overloaded"

}

没有链接
500

不完整的生成

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Incomplete generation"

}

没有链接

# POST /generate

生成token

# 参数

无参数

# 请求正文Request body

应用程序/json

  • 示例值|架构
{
  "inputs": "My name is Olivier and I",
  "parameters": {
    "best_of": 1,
    "decoder_input_details": false,
    "details": true,
    "do_sample": true,
    "frequency_penalty": 0.1,
    "grammar": null,
    "max_new_tokens": 20,
    "repetition_penalty": 1.03,
    "return_full_text": false,
    "seed": null,
    "stop": [
      "photographer"
    ],
    "temperature": 0.5,
    "top_k": 10,
    "top_n_tokens": 5,
    "top_p": 0.95,
    "truncate": null,
    "typical_p": 0.95,
    "watermark": true
  }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

# 响应

代码 描述 链接
200

生成的文本

媒体类型

应用程序/json

控制标题。Accept

- 示例值|架构

{

  "details": {

        "best_of_sequences": [

            {

                "finish_reason": "Length",

                "generated_text": "test",

                "generated_tokens": 1,

                "prefill": [

                    {

                        "id": 0,

                        "logprob": -0.34,

                        "text": "test"

                    }

                ],

                "seed": 42,

                "tokens": [

                    {

                        "id": 0,

                        "logprob": -0.34,

                        "special": false,

                        "text": "test"

                    }

                ],

                "top_tokens": [

                    [

                        {

                            "id": 0,

                            "logprob": -0.34,

                            "special": false,

                            "text": "test"

                        }

                    ]

                ]

            }

        ],

        "finish_reason": "Length",

        "generated_tokens": 1,

        "prefill": [

            {

                "id": 0,

                "logprob": -0.34,

                "text": "test"

            }

        ],

        "seed": 42,

        "tokens": [

            {

                "id": 0,

                "logprob": -0.34,

                "special": false,

                "text": "test"

            }

        ],

        "top_tokens": [

            [

                {

                    "id": 0,

                    "logprob": -0.34,

                    "special": false,

                    "text": "test"

                }

            ]

        ]

  },

  "generated_text": "test"

}

没有链接
422

输入验证错误

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Input validation error"

}

没有链接
424

生成错误

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Request failed during generation"

}

没有链接
429

模型超载

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Model is overloaded"

}

没有链接
500

不完整的生成

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Incomplete generation"

}

没有链接

# POST /generate_stream

使用服务器发送事件生成令牌流

# 参数

无参数

# 请求正文Request body

应用程序/json

  • 示例值|架构
{
  "inputs": "My name is Olivier and I",
  "parameters": {
    "best_of": 1,
    "decoder_input_details": false,
    "details": true,
    "do_sample": true,
    "frequency_penalty": 0.1,
    "grammar": null,
    "max_new_tokens": 20,
    "repetition_penalty": 1.03,
    "return_full_text": false,
    "seed": null,
    "stop": [
      "photographer"
    ],
    "temperature": 0.5,
    "top_k": 10,
    "top_n_tokens": 5,
    "top_p": 0.95,
    "truncate": null,
    "typical_p": 0.95,
    "watermark": true
  }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

# 响应

代码 描述 链接
200

生成的文本

媒体类型

文本/事件流

控制标题。Accept

- 示例值|架构

{

  "details": null,

  "generated_text": "test",

  "index": 0,

  "token": {

        "id": 0,

        "logprob": -0.34,

        "special": false,

        "text": "test"

  },

  "top_tokens": [

        {

            "id": 0,

            "logprob": -0.34,

            "special": false,

            "text": "test"

        }

  ]

}

没有链接
422

输入验证错误

媒体类型

文本/事件流

- 示例值|架构

{

  "error": "Input validation error"

}

没有链接
424

生成错误

媒体类型

文本/事件流

- 示例值|架构

{

  "error": "Request failed during generation"

}

没有链接
429

模型超载

媒体类型

文本/事件流

- 示例值|架构

{

  "error": "Model is overloaded"

}

没有链接
500

不完整的生成

媒体类型

文本/事件流

- 示例值|架构

{

  "error": "Incomplete generation"

}

没有链接

# GET /health

健康检查

# 参数

无参数

# 响应

代码 描述 链接
200 一切正常 没有链接
503

文本生成推理下降

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "unhealthy",

  "error_type": "healthcheck"

}

没有链接

# GET /info

文本生成推理端点信息

# 参数

无参数

# 响应

代码 描述 链接
200

服务模型信息

媒体类型

应用程序/json

控制标题。Accept

- 示例值|架构

{

  "docker_label": "null",

  "max_batch_size": null,

  "max_batch_total_tokens": 32000,

  "max_best_of": 2,

  "max_client_batch_size": 32,

  "max_concurrent_requests": 128,

  "max_input_length": 1024,

  "max_stop_sequences": 4,

  "max_total_tokens": 2048,

  "max_waiting_tokens": 20,

  "model_device_type": "cuda",

  "model_dtype": "torch.float16",

  "model_id": "bigscience/blomm-560m",

  "model_pipeline_tag": "text-generation",

  "model_sha": "e985a63cdc139290c5f700ff1929f0b5942cced2",

  "sha": "null",

  "validation_workers": 2,

  "version": "0.5.0",

  "waiting_served_ratio": 1.2

}

没有链接

# GET /metrics

Prometheus 指标抓取端点

# 参数

无参数

# 响应

代码 描述 链接
200

Prometheus 指标

媒体类型

文本/纯文本

控制标题。Accept

- 示例值|架构

string

没有链接

# POST /tokenize

标记输入

# 参数

无参数

# 请求正文Request body

应用程序/json

  • 示例值|架构
{
 "inputs": "My name is Olivier and I",
 "parameters": {
   "best_of": 1,
   "decoder_input_details": false,
   "details": true,
   "do_sample": true,
   "frequency_penalty": 0.1,
   "grammar": null,
   "max_new_tokens": 20,
   "repetition_penalty": 1.03,
   "return_full_text": false,
   "seed": null,
   "stop": [
     "photographer"
   ],
   "temperature": 0.5,
   "top_k": 10,
   "top_n_tokens": 5,
   "top_p": 0.95,
   "truncate": null,
   "typical_p": 0.95,
   "watermark": true
 }
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

# 响应

代码 描述 链接
200

标记化 ID

媒体类型

应用程序/json

控制标题。Accept

- 示例值|架构

[

  {

        "id": 0,

        "start": 0,

        "stop": 2,

        "text": "test"

  }

]

没有链接
404

未找到标记器

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "No fast tokenizer available"

}

没有链接

# POST /v1/chat/completions

生成token

# 参数

无参数

# 请求正文Request body

应用程序/json

  • 示例值|架构
{
  "frequency_penalty": 1,
  "logit_bias": [
    0
  ],
  "logprobs": false,
  "max_tokens": 32,
  "messages": [
    {
      "role": "user",
      "content": "What is Deep Learning?"
    }
  ],
  "model": "mistralai/Mistral-7B-Instruct-v0.2",
  "n": 2,
  "presence_penalty": 0.1,
  "seed": 42,
  "stop": null,
  "stream": true,
  "temperature": 1,
  "tool_choice": {
    "FunctionName": "string"
  },
  "tool_prompt": "\"You will be presented with a JSON schema representing a set of tools.\nIf the user request lacks of sufficient information to make a precise tool selection: Do not invent any tool's properties, instead notify with an error message.\n\nJSON Schema:\n\"",
  "tools": null,
  "top_logprobs": 5,
  "top_p": 0.95
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

# 响应

代码 描述 链接
200

生成的聊天完成

媒体类型

应用程序/json文本/事件流

控制标题。Accept

- 示例值|架构

{

  "choices": [

        {

            "finish_reason": "string",

            "index": 0,

            "logprobs": {

                "content": [

                    {

                        "logprob": 0,

                        "token": "string",

                        "top_logprobs": [

                            {

                                "logprob": 0,

                                "token": "string"

                            }

                        ]

                    }

                ]

            },

            "message": {

                "content": "My name is David and I",

                "name": ""David"",

                "role": "user",

                "tool_calls": [

                    {

                        "function": {

                            "arguments": "string",

                            "description": "string",

                            "name": "string"

                        },

                        "id": 0,

                        "type": "string"

                    }

                ]

            }

        }

  ],

  "created": 1706270835,

  "id": "string",

  "model": "mistralai/Mistral-7B-Instruct-v0.2",

  "object": "string",

  "system_fingerprint": "string",

  "usage": {

        "completion_tokens": 0,

        "prompt_tokens": 0,

        "total_tokens": 0

  }

}

没有链接
422

输入验证错误

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Input validation error"

}

没有链接
424

生成错误

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Request failed during generation"

}

没有链接
429

模型超载

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Model is overloaded"

}

没有链接
500

不完整的生成

媒体类型

应用程序/json

- 示例值|架构

{

  "error": "Incomplete generation"

}

没有链接

# POST /v1/completions

生成token

# 参数

无参数

# 请求正文Request body

应用程序/json

  • 示例值|架构
{
  "frequency_penalty": 1,
  "max_tokens": 32,
  "model": "mistralai/Mistral-7B-Instruct-v0.2",
  "prompt": "What is Deep Learning?",
  "repetition_penalty": 0,
  "seed": 42,
  "stream": true,
  "suffix": "string",
  "temperature": 1,
  "top_p": 0.95,
  "stop": null
}
1
2
3
4
5
6
7
8
9
10
11
12
13

# 响应

代码 描述 链接
200

生成的聊天完成

媒体类型

应用程序/json文本/事件流

控制标题。Accept

- 示例值|架构

"string"

没有链接
422

输入验证错误

媒体类型

应用程序/json

- 示例值|架构

{

    "error": "Input validation error"

}

没有链接
424

生成错误

媒体类型

应用程序/json

- 示例值|架构

{

    "error": "Request failed during generation"

}

没有链接
429

模型超载

媒体类型

应用程序/json

- 示例值|架构

{

    "error": "Model is overloaded"

}

没有链接
500

不完整的生成

媒体类型

应用程序/json

- 示例值|架构

{

    "error": "Incomplete generation"

}

没有链接
Last Updated: 7/18/2024, 9:41:43 AM