From f0770cfe1d5094d5d40a878658abf535bbdcec4c Mon Sep 17 00:00:00 2001 From: romazu Date: Fri, 16 Jun 2023 17:13:26 +0400 Subject: [PATCH] audio: add items to AudioResponseFormat enum (#382) * audio: add items to AudioResponseFormat enum * audio: expand AudioResponse struct to accommodate verbose json response --------- Co-authored-by: Roman Zubov --- audio.go | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/audio.go b/audio.go index 20e865f..adfc527 100644 --- a/audio.go +++ b/audio.go @@ -20,9 +20,11 @@ const ( type AudioResponseFormat string const ( - AudioResponseFormatJSON AudioResponseFormat = "json" - AudioResponseFormatSRT AudioResponseFormat = "srt" - AudioResponseFormatVTT AudioResponseFormat = "vtt" + AudioResponseFormatJSON AudioResponseFormat = "json" + AudioResponseFormatText AudioResponseFormat = "text" + AudioResponseFormatSRT AudioResponseFormat = "srt" + AudioResponseFormatVerboseJSON AudioResponseFormat = "verbose_json" + AudioResponseFormatVTT AudioResponseFormat = "vtt" ) // AudioRequest represents a request structure for audio API. @@ -44,6 +46,22 @@ type AudioRequest struct { // AudioResponse represents a response structure for audio API. type AudioResponse struct { + Task string `json:"task"` + Language string `json:"language"` + Duration float64 `json:"duration"` + Segments []struct { + ID int `json:"id"` + Seek int `json:"seek"` + Start float64 `json:"start"` + End float64 `json:"end"` + Text string `json:"text"` + Tokens []int `json:"tokens"` + Temperature float64 `json:"temperature"` + AvgLogprob float64 `json:"avg_logprob"` + CompressionRatio float64 `json:"compression_ratio"` + NoSpeechProb float64 `json:"no_speech_prob"` + Transient bool `json:"transient"` + } `json:"segments"` Text string `json:"text"` } @@ -96,7 +114,7 @@ func (c *Client) callAudioAPI( // HasJSONResponse returns true if the response format is JSON. func (r AudioRequest) HasJSONResponse() bool { - return r.Format == "" || r.Format == AudioResponseFormatJSON + return r.Format == "" || r.Format == AudioResponseFormatJSON || r.Format == AudioResponseFormatVerboseJSON } // audioMultipartForm creates a form with audio file contents and the name of the model to use for