feat: initial TTS support (#528)

* feat: initial TTS support * chore: lint, omitempty * chore: dont use pointer in struct * fix: add mocked server tests to speech_test.go Co-authored-by: Lachlan Laycock <supagroova@gmail.com> * chore: update imports * chore: fix lint * chore: add an error check * chore: ignore lint * chore: add error checks in package * chore: add test * chore: fix test --------- Co-authored-by: Lachlan Laycock <supagroova@gmail.com>
2023-11-13 09:35:34 -05:00
parent b7cac703ac
commit 515de0219d
3 changed files with 205 additions and 0 deletions
--- a/speech_test.go
+++ b/speech_test.go
@@ -0,0 +1,115 @@
+package openai_test
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"mime"
+	"net/http"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/sashabaranov/go-openai"
+	"github.com/sashabaranov/go-openai/internal/test"
+	"github.com/sashabaranov/go-openai/internal/test/checks"
+)
+
+func TestSpeechIntegration(t *testing.T) {
+	client, server, teardown := setupOpenAITestServer()
+	defer teardown()
+
+	server.RegisterHandler("/v1/audio/speech", func(w http.ResponseWriter, r *http.Request) {
+		dir, cleanup := test.CreateTestDirectory(t)
+		path := filepath.Join(dir, "fake.mp3")
+		test.CreateTestFile(t, path)
+		defer cleanup()
+
+		// audio endpoints only accept POST requests
+		if r.Method != "POST" {
+			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+			return
+		}
+
+		mediaType, _, err := mime.ParseMediaType(r.Header.Get("Content-Type"))
+		if err != nil {
+			http.Error(w, "failed to parse media type", http.StatusBadRequest)
+			return
+		}
+
+		if mediaType != "application/json" {
+			http.Error(w, "request is not json", http.StatusBadRequest)
+			return
+		}
+
+		// Parse the JSON body of the request
+		var params map[string]interface{}
+		err = json.NewDecoder(r.Body).Decode(&params)
+		if err != nil {
+			http.Error(w, "failed to parse request body", http.StatusBadRequest)
+			return
+		}
+
+		// Check if each required field is present in the parsed JSON object
+		reqParams := []string{"model", "input", "voice"}
+		for _, param := range reqParams {
+			_, ok := params[param]
+			if !ok {
+				http.Error(w, fmt.Sprintf("no %s in params", param), http.StatusBadRequest)
+				return
+			}
+		}
+
+		// read audio file content
+		audioFile, err := os.ReadFile(path)
+		if err != nil {
+			http.Error(w, "failed to read audio file", http.StatusInternalServerError)
+			return
+		}
+
+		// write audio file content to response
+		w.Header().Set("Content-Type", "audio/mpeg")
+		w.Header().Set("Transfer-Encoding", "chunked")
+		w.Header().Set("Connection", "keep-alive")
+		_, err = w.Write(audioFile)
+		if err != nil {
+			http.Error(w, "failed to write body", http.StatusInternalServerError)
+			return
+		}
+	})
+
+	t.Run("happy path", func(t *testing.T) {
+		res, err := client.CreateSpeech(context.Background(), openai.CreateSpeechRequest{
+			Model: openai.TTSModel1,
+			Input: "Hello!",
+			Voice: openai.VoiceAlloy,
+		})
+		checks.NoError(t, err, "CreateSpeech error")
+		defer res.Close()
+
+		buf, err := io.ReadAll(res)
+		checks.NoError(t, err, "ReadAll error")
+
+		// save buf to file as mp3
+		err = os.WriteFile("test.mp3", buf, 0644)
+		checks.NoError(t, err, "Create error")
+	})
+	t.Run("invalid model", func(t *testing.T) {
+		_, err := client.CreateSpeech(context.Background(), openai.CreateSpeechRequest{
+			Model: "invalid_model",
+			Input: "Hello!",
+			Voice: openai.VoiceAlloy,
+		})
+		checks.ErrorIs(t, err, openai.ErrInvalidSpeechModel, "CreateSpeech error")
+	})
+
+	t.Run("invalid voice", func(t *testing.T) {
+		_, err := client.CreateSpeech(context.Background(), openai.CreateSpeechRequest{
+			Model: openai.TTSModel1,
+			Input: "Hello!",
+			Voice: "invalid_voice",
+		})
+		checks.ErrorIs(t, err, openai.ErrInvalidVoice, "CreateSpeech error")
+	})
+}