[New_Features] Adds recently added Assistant cost saving parameters (#710)

* add cost saving parameters * add periods at the end of comments * shorten commnet * further lower comment length * fix type
2024-04-17 03:26:14 +08:00
parent 2446f08f94
commit a42f51967f
1 changed files with 44 additions and 0 deletions
--- a/run.go
+++ b/run.go
@@ -28,6 +28,16 @@ type Run struct {
 	Metadata       map[string]any     `json:"metadata"`
 	Usage          Usage              `json:"usage,omitempty"`

+	Temperature *float32 `json:"temperature,omitempty"`
+	// The maximum number of prompt tokens that may be used over the course of the run.
+	// If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'.
+	MaxPromptTokens int `json:"max_prompt_tokens,omitempty"`
+	// The maximum number of completion tokens that may be used over the course of the run.
+	// If the run exceeds the number of completion tokens specified, the run will end with status 'complete'.
+	MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
+	// ThreadTruncationStrategy defines the truncation strategy to use for the thread.
+	TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"`
+
 	httpHeader
 }

@@ -78,8 +88,42 @@ type RunRequest struct {
 	AdditionalInstructions string         `json:"additional_instructions,omitempty"`
 	Tools                  []Tool         `json:"tools,omitempty"`
 	Metadata               map[string]any `json:"metadata,omitempty"`
+
+	// Sampling temperature between 0 and 2. Higher values like 0.8 are  more random.
+	// lower values are more focused and deterministic.
+	Temperature *float32 `json:"temperature,omitempty"`
+
+	// The maximum number of prompt tokens that may be used over the course of the run.
+	// If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'.
+	MaxPromptTokens int `json:"max_prompt_tokens,omitempty"`
+
+	// The maximum number of completion tokens that may be used over the course of the run.
+	// If the run exceeds the number of completion tokens specified, the run will end with status 'complete'.
+	MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
+
+	// ThreadTruncationStrategy defines the truncation strategy to use for the thread.
+	TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"`
 }

+// ThreadTruncationStrategy defines the truncation strategy to use for the thread.
+// https://platform.openai.com/docs/assistants/how-it-works/truncation-strategy.
+type ThreadTruncationStrategy struct {
+	// default 'auto'.
+	Type TruncationStrategy `json:"type,omitempty"`
+	// this field should be set if the truncation strategy is set to LastMessages.
+	LastMessages *int `json:"last_messages,omitempty"`
+}
+
+// TruncationStrategy defines the existing truncation strategies existing for thread management in an assistant.
+type TruncationStrategy string
+
+const (
+	// TruncationStrategyAuto messages in the middle of the thread will be dropped to fit the context length of the model.
+	TruncationStrategyAuto = TruncationStrategy("auto")
+	// TruncationStrategyLastMessages the thread will be truncated to the n most recent messages in the thread.
+	TruncationStrategyLastMessages = TruncationStrategy("last_messages")
+)
+
 type RunModifyRequest struct {
 	Metadata map[string]any `json:"metadata,omitempty"`
 }