add max_completions_tokens for o1 series models (#857)

* add max_completions_tokens for o1 series models * add validation for o1 series models validataion + beta limitations
2024-09-20 23:40:24 +03:00
parent 1ec8c24ea7
commit 9add1c3486
5 changed files with 341 additions and 12 deletions
--- a/chat.go
+++ b/chat.go
@@ -200,18 +200,25 @@ type ChatCompletionResponseFormatJSONSchema struct {

 // ChatCompletionRequest represents a request structure for chat completion API.
 type ChatCompletionRequest struct {
-	Model            string                        `json:"model"`
-	Messages         []ChatCompletionMessage       `json:"messages"`
-	MaxTokens        int                           `json:"max_tokens,omitempty"`
-	Temperature      float32                       `json:"temperature,omitempty"`
-	TopP             float32                       `json:"top_p,omitempty"`
-	N                int                           `json:"n,omitempty"`
-	Stream           bool                          `json:"stream,omitempty"`
-	Stop             []string                      `json:"stop,omitempty"`
-	PresencePenalty  float32                       `json:"presence_penalty,omitempty"`
-	ResponseFormat   *ChatCompletionResponseFormat `json:"response_format,omitempty"`
-	Seed             *int                          `json:"seed,omitempty"`
-	FrequencyPenalty float32                       `json:"frequency_penalty,omitempty"`
+	Model    string                  `json:"model"`
+	Messages []ChatCompletionMessage `json:"messages"`
+	// MaxTokens The maximum number of tokens that can be generated in the chat completion.
+	// This value can be used to control costs for text generated via API.
+	// This value is now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models.
+	// refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
+	MaxTokens int `json:"max_tokens,omitempty"`
+	// MaxCompletionsTokens An upper bound for the number of tokens that can be generated for a completion,
+	// including visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning
+	MaxCompletionsTokens int                           `json:"max_completions_tokens,omitempty"`
+	Temperature          float32                       `json:"temperature,omitempty"`
+	TopP                 float32                       `json:"top_p,omitempty"`
+	N                    int                           `json:"n,omitempty"`
+	Stream               bool                          `json:"stream,omitempty"`
+	Stop                 []string                      `json:"stop,omitempty"`
+	PresencePenalty      float32                       `json:"presence_penalty,omitempty"`
+	ResponseFormat       *ChatCompletionResponseFormat `json:"response_format,omitempty"`
+	Seed                 *int                          `json:"seed,omitempty"`
+	FrequencyPenalty     float32                       `json:"frequency_penalty,omitempty"`
 	// LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.
 	// incorrect: `"logit_bias":{"You": 6}`, correct: `"logit_bias":{"1639": 6}`
 	// refs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias
@@ -364,6 +371,10 @@ func (c *Client) CreateChatCompletion(
 		return
 	}

+	if err = validateRequestForO1Models(request); err != nil {
+		return
+	}
+
 	req, err := c.newRequest(
 		ctx,
 		http.MethodPost,