[New_Features] Adds recently added Assistant cost saving parameters (#710)

* add cost saving parameters

* add periods at the end of comments

* shorten commnet

* further lower comment length

* fix type
This commit is contained in:
Quest Henkart
2024-04-17 03:26:14 +08:00
committed by GitHub
parent 2446f08f94
commit a42f51967f

44
run.go
View File

@@ -28,6 +28,16 @@ type Run struct {
Metadata map[string]any `json:"metadata"`
Usage Usage `json:"usage,omitempty"`
Temperature *float32 `json:"temperature,omitempty"`
// The maximum number of prompt tokens that may be used over the course of the run.
// If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'.
MaxPromptTokens int `json:"max_prompt_tokens,omitempty"`
// The maximum number of completion tokens that may be used over the course of the run.
// If the run exceeds the number of completion tokens specified, the run will end with status 'complete'.
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
// ThreadTruncationStrategy defines the truncation strategy to use for the thread.
TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"`
httpHeader
}
@@ -78,8 +88,42 @@ type RunRequest struct {
AdditionalInstructions string `json:"additional_instructions,omitempty"`
Tools []Tool `json:"tools,omitempty"`
Metadata map[string]any `json:"metadata,omitempty"`
// Sampling temperature between 0 and 2. Higher values like 0.8 are more random.
// lower values are more focused and deterministic.
Temperature *float32 `json:"temperature,omitempty"`
// The maximum number of prompt tokens that may be used over the course of the run.
// If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'.
MaxPromptTokens int `json:"max_prompt_tokens,omitempty"`
// The maximum number of completion tokens that may be used over the course of the run.
// If the run exceeds the number of completion tokens specified, the run will end with status 'complete'.
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
// ThreadTruncationStrategy defines the truncation strategy to use for the thread.
TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"`
}
// ThreadTruncationStrategy defines the truncation strategy to use for the thread.
// https://platform.openai.com/docs/assistants/how-it-works/truncation-strategy.
type ThreadTruncationStrategy struct {
// default 'auto'.
Type TruncationStrategy `json:"type,omitempty"`
// this field should be set if the truncation strategy is set to LastMessages.
LastMessages *int `json:"last_messages,omitempty"`
}
// TruncationStrategy defines the existing truncation strategies existing for thread management in an assistant.
type TruncationStrategy string
const (
// TruncationStrategyAuto messages in the middle of the thread will be dropped to fit the context length of the model.
TruncationStrategyAuto = TruncationStrategy("auto")
// TruncationStrategyLastMessages the thread will be truncated to the n most recent messages in the thread.
TruncationStrategyLastMessages = TruncationStrategy("last_messages")
)
type RunModifyRequest struct {
Metadata map[string]any `json:"metadata,omitempty"`
}