ai_sandbox/speech-speech/backend/api.py

62 lines
1.5 KiB
Python
Raw Normal View History

2024-02-20 11:31:01 -05:00
from openai import OpenAI
from fastapi import FastAPI, File, Response, Request
2024-02-24 17:58:26 -05:00
from fastapi.middleware.cors import CORSMiddleware
2024-02-20 19:35:36 -05:00
from fastapi.responses import FileResponse
2024-02-20 11:31:01 -05:00
from pydantic import BaseModel
2024-02-26 11:52:13 -05:00
from io import BytesIO
2024-02-20 11:31:01 -05:00
app = FastAPI()
openAI_clinet = OpenAI()
2024-02-24 17:58:26 -05:00
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
2024-02-20 11:31:01 -05:00
class ConversationMessege(BaseModel):
role: str
content: str
class Conversation(BaseModel):
messages: list[ConversationMessege]
@app.post("/get-text")
2024-02-26 11:52:13 -05:00
async def stt(audio: bytes = File()):
with BytesIO(audio) as f:
f.name = "audio.mp3"
2024-02-25 17:09:35 -05:00
transcript = openAI_clinet.audio.transcriptions.create(
model="whisper-1",
file=f,
response_format="text",
2024-02-26 11:52:13 -05:00
)
2024-02-25 17:09:35 -05:00
data = {"len": len(audio), "user-transcript": transcript}
2024-02-20 11:31:01 -05:00
return data
@app.post("/conversation")
2024-02-25 13:40:08 -05:00
async def get_next_response(request: Request):
2024-02-20 11:31:01 -05:00
messages = await request.json()
res = openAI_clinet.chat.completions.create(
model="gpt-3.5-turbo",
messages=messages,
)
res_msg = res.choices[0].message.content
role = res.choices[0].message.role
2024-02-20 17:29:37 -05:00
print(messages)
2024-02-20 11:31:01 -05:00
print(res_msg)
return {"role": role, "content": res_msg}
2024-02-20 19:35:36 -05:00
2024-02-25 17:09:35 -05:00
@app.get("/speak")
2024-02-25 13:40:08 -05:00
def tts(text: str):
2024-02-20 19:35:36 -05:00
res = openAI_clinet.audio.speech.create(
2024-02-26 11:52:13 -05:00
model="tts-1", voice="nova", input=text, response_format="mp3"
2024-02-20 19:35:36 -05:00
)
2024-02-25 17:09:35 -05:00
return Response(content=res.content, media_type="audio/mp3")