Implement chat completion streaming (#101)
* Implement chat completion streaming * Optimize the implementation of chat completion stream * Fix linter error
This commit is contained in:
110
chat_stream.go
Normal file
110
chat_stream.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package gogpt
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type ChatCompletionStreamChoiceDelta struct {
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type ChatCompletionStreamChoice struct {
|
||||
Index int `json:"index"`
|
||||
Delta ChatCompletionStreamChoiceDelta `json:"delta"`
|
||||
FinishReason string `json:"finish_reason"`
|
||||
}
|
||||
|
||||
type ChatCompletionStreamResponse struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Created int64 `json:"created"`
|
||||
Model string `json:"model"`
|
||||
Choices []ChatCompletionStreamChoice `json:"choices"`
|
||||
}
|
||||
|
||||
// ChatCompletionStream
|
||||
// Note: Perhaps it is more elegant to abstract Stream using generics.
|
||||
type ChatCompletionStream struct {
|
||||
emptyMessagesLimit uint
|
||||
isFinished bool
|
||||
|
||||
reader *bufio.Reader
|
||||
response *http.Response
|
||||
}
|
||||
|
||||
func (stream *ChatCompletionStream) Recv() (response ChatCompletionStreamResponse, err error) {
|
||||
if stream.isFinished {
|
||||
err = io.EOF
|
||||
return
|
||||
}
|
||||
|
||||
var emptyMessagesCount uint
|
||||
|
||||
waitForData:
|
||||
line, err := stream.reader.ReadBytes('\n')
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var headerData = []byte("data: ")
|
||||
line = bytes.TrimSpace(line)
|
||||
if !bytes.HasPrefix(line, headerData) {
|
||||
emptyMessagesCount++
|
||||
if emptyMessagesCount > stream.emptyMessagesLimit {
|
||||
err = ErrTooManyEmptyStreamMessages
|
||||
return
|
||||
}
|
||||
|
||||
goto waitForData
|
||||
}
|
||||
|
||||
line = bytes.TrimPrefix(line, headerData)
|
||||
if string(line) == "[DONE]" {
|
||||
stream.isFinished = true
|
||||
err = io.EOF
|
||||
return
|
||||
}
|
||||
|
||||
err = json.Unmarshal(line, &response)
|
||||
return
|
||||
}
|
||||
|
||||
func (stream *ChatCompletionStream) Close() {
|
||||
stream.response.Body.Close()
|
||||
}
|
||||
|
||||
func (stream *ChatCompletionStream) GetResponse() *http.Response {
|
||||
return stream.response
|
||||
}
|
||||
|
||||
// CreateChatCompletionStream — API call to create a chat completion w/ streaming
|
||||
// support. It sets whether to stream back partial progress. If set, tokens will be
|
||||
// sent as data-only server-sent events as they become available, with the
|
||||
// stream terminated by a data: [DONE] message.
|
||||
func (c *Client) CreateChatCompletionStream(
|
||||
ctx context.Context,
|
||||
request ChatCompletionRequest,
|
||||
) (stream *ChatCompletionStream, err error) {
|
||||
request.Stream = true
|
||||
req, err := c.newStreamRequest(ctx, "POST", "/chat/completions", request)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := c.config.HTTPClient.Do(req) //nolint:bodyclose // body is closed in stream.Close()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
stream = &ChatCompletionStream{
|
||||
emptyMessagesLimit: c.config.EmptyMessagesLimit,
|
||||
reader: bufio.NewReader(resp.Body),
|
||||
response: resp,
|
||||
}
|
||||
return
|
||||
}
|
||||
Reference in New Issue
Block a user