cleanup

audio playback for both user and assistant
assistant response is replayable
2024-02-28 12:10:49 -05:00 · 2024-02-28 11:55:34 -05:00 · 2024-02-28 11:33:08 -05:00 · 2024-02-26 20:21:17 -05:00 · 2024-02-26 20:03:58 -05:00 · 2024-02-26 19:58:28 -05:00
9 changed files with 109 additions and 54 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,28 @@
 **/audio
 *.mp3
 *.webm
 .env
 # Logs
 logs
 *.log
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 pnpm-debug.log*
 lerna-debug.log*
 node_modules
 dist
 dist-ssr
 *.local
 # Editor directories and files
 .vscode/*
 !.vscode/extensions.json
 .idea
 .DS_Store
 *.suo
 *.ntvs*
 *.njsproj
 *.sln
 *.sw?
--- a/README.md
+++ b/README.md
@@ -1,2 +1,14 @@
 # ai_sandbox
 A learning arena to learn about the current AI tool landscape
 ## Subprojects
 ### [Speech to Speech AI Assistant](./speech-speech/)
 AI assistant chat with speech recognition and tts responses
 Fullstack  
 - Vite, TS, React frontend
 - fastapi backend
 - OpenAI for LLM services
--- a/speech-speech/.gitignore
+++ b/speech-speech/.gitignore
@@ -1,24 +0,0 @@
 # Logs
 logs
 *.log
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 pnpm-debug.log*
 lerna-debug.log*
 node_modules
 dist
 dist-ssr
 *.local
 # Editor directories and files
 .vscode/*
 !.vscode/extensions.json
 .idea
 .DS_Store
 *.suo
 *.ntvs*
 *.njsproj
 *.sln
 *.sw?
--- a/speech-speech/README.md
+++ b/speech-speech/README.md
@@ -0,0 +1,35 @@
 # Speech to Speech AI Assistant
 AI assistant chat with speech recognition and tts responses
 Fullstack  
 - Vite, TS, React frontend
 - fastapi backend
 - OpenAI for LLM services
 ## Requirements
 - python3
 - npm
 - OpenAI API token
 ## Setup
 ```
 cd frontend
 npm install
 npm run build
 cd ../backend
 # optionally setup virtual environment of your choice
 python3 -m pip install -r requirements.txt
 ```
 # Running 
 example `backend/.env`
 ```
 OPEN_API_KEY=<apikey>
 ```
 ```
 cd backend
 source .env
 uvicorn --port 8080 api:app
 ```
--- a/speech-speech/backend/.env
+++ b/speech-speech/backend/.env
@@ -1 +0,0 @@
 OPENAI_API_KEY=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
--- a/speech-speech/backend/api.py
+++ b/speech-speech/backend/api.py
@@ -1,8 +1,9 @@
 from openai import OpenAI
 from fastapi import FastAPI, File, Response, Request
 from fastapi.staticfiles import StaticFiles
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse
 from pydantic import BaseModel
 from io import BytesIO
 app = FastAPI()
@@ -26,9 +27,9 @@ class Conversation(BaseModel):
@app.post("/get-text")
-def stt(audio: bytes = File()):
+async def stt(audio: bytes = File()):
-    with open("audio.webm", "wb+") as f:
+    with BytesIO(audio) as f:
-        f.write(audio)
+        f.name = "audio.mp3"
        transcript = openAI_clinet.audio.transcriptions.create(
            model="whisper-1",
            file=f,
@@ -40,8 +41,6 @@ def stt(audio: bytes = File()):
@app.post("/conversation")
 async def get_next_response(request: Request):
    # role = "test"
    # res_msg = "temp test response"
    messages = await request.json()
    res = openAI_clinet.chat.completions.create(
        model="gpt-3.5-turbo",
@@ -57,10 +56,11 @@ async def get_next_response(request: Request):
@app.get("/speak")
 def tts(text: str):
    res = openAI_clinet.audio.speech.create(
-        model="tts-1",
+        model="tts-1", voice="nova", input=text, response_format="mp3"
        voice="nova",
        input=text,
        response_format='mp3'
    )
    # this works for now but I need to find a way to stream this to response
    return Response(content=res.content, media_type="audio/mp3")
 # if this is above other routes it will try and serve files instead of matching
 # the intended route
 app.mount("/", StaticFiles(directory="dist", html=True), name="static")
--- a/speech-speech/frontend/src/App.tsx
+++ b/speech-speech/frontend/src/App.tsx
@@ -2,6 +2,7 @@ import { useState } from "react";
 import { ChatMsg, Controls, Feed, Header } from "./components.tsx";
 import "./App.css";
 let userAudio: Array<Blob> = [];
 let audioBlobs: Array<Blob> = [];
 let streamBeingCaptured: MediaStream | null = null;
 let mediaRecorder: MediaRecorder | null = null;
@@ -51,14 +52,6 @@ function playRecord() {
  audio.play();
 }
 function playMsg(msg: ChatMsg) {
  const audio = new Audio(
    "http://100.82.51.22:8001/speak?" +
    new URLSearchParams({ text: msg.content }),
  );
  console.log("loading audio and playing?");
  audio.play();
 }
 function App() {
  const [recordState, setRecordState] = useState(false);
  const [chatState, setChatState] = useState([{
@@ -77,18 +70,24 @@ function App() {
  }
  function sendAudio() {
-    var formData = new FormData();
+    let formData = new FormData();
-    formData.append("audio", new Blob(audioBlobs, { type: "audio/webm" }));
+    let audio = new Blob(audioBlobs, { type: "audio/webm" });
-    fetch("http://100.82.51.22:8001/get-text", {
+    userAudio.push(audio);
    formData.append("audio", audio);
    fetch("/get-text", {
      "method": "POST",
      "body": formData,
    }).then((res) => res.json())
      .then((res) => {
        setChatState((curState: Array<ChatMsg>) => [
          ...curState,
-          { "role": "user", "content": res["user-transcript"] },
+          {
            "role": "user",
            "content": res["user-transcript"],
            "audio": URL.createObjectURL(userAudio[userAudio.length - 1]),
          },
        ]);
-        fetch("http://100.82.51.22:8001/conversation", {
+        fetch("/conversation", {
          "method": "POST",
          "body": JSON.stringify([...chatState, {
            "role": "user",
@@ -98,9 +97,10 @@ function App() {
          .then((res) => {
            setChatState((
              curState: Array<ChatMsg>,
-            ) => [...curState, res]);
+            ) => [...curState, {
-            console.log("attempting to play result");
+              ...res,
-            playMsg(res);
+              "audio": "/speak?" + new URLSearchParams({ text: res.content }),
            }]);
          });
      });
  }
--- a/speech-speech/frontend/src/components.tsx
+++ b/speech-speech/frontend/src/components.tsx
@@ -6,10 +6,10 @@ import {
  TbPlayerStop,
 } from "react-icons/tb";
 export type ChatMsg = {
  role: string;
  content: string;
  audio?: string;
 };
 export function Header() {
@@ -59,6 +59,11 @@ export function Msg(props: { msg: ChatMsg }) {
      <span className="ml-8">
        {props.msg.content}
      </span>
      <audio
        controls
        autoPlay={props.msg.role == "assistant"}
        src={props.msg.audio}
      />
    </div>
  );
 }
--- a/speech-speech/frontend/vite.config.ts
+++ b/speech-speech/frontend/vite.config.ts
@@ -9,4 +9,7 @@ export default defineConfig({
      "Access-Control-Allow-Origin": '*',
    },
  },
 	build: {
 		outDir: '../backend/dist/',
 	},
 });
Author	SHA1	Message	Date
Andrei Stoica	8aa7bd2e99	cleanup	2024-02-28 12:10:49 -05:00
Andrei Stoica	3c0a9b150b	audio playback for both user and assistant	2024-02-28 11:55:34 -05:00
Andrei Stoica	5cc002a110	assistant response is replayable	2024-02-28 11:33:08 -05:00
Andrei Stoica	7562778f18	merged gitignores	2024-02-26 20:21:17 -05:00
Andrei Stoica	4fd825e1ae	fixing urls	2024-02-26 20:03:58 -05:00
Andrei Stoica	7acdbb3136	cleaning up readmes	2024-02-26 19:58:28 -05:00
Andrei Stoica	f1c2108bc7	updating readmes	2024-02-26 19:43:10 -05:00
Andrei Stoica	056e1067f4	building frontend and serving with fastapi	2024-02-26 13:17:26 -05:00
Andrei Stoica	1916185f19	cleaning up backend sins now	2024-02-26 11:54:00 -05:00
		`@@ -1 +0,0 @@`
			`OPENAI_API_KEY=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`