cleanup

audio playback for both user and assistant
assistant response is replayable
2024-02-28 12:10:49 -05:00 · 2024-02-28 11:55:34 -05:00 · 2024-02-28 11:33:08 -05:00 · 2024-02-26 20:21:17 -05:00 · 2024-02-26 20:03:58 -05:00 · 2024-02-26 19:58:28 -05:00
9 changed files with 109 additions and 54 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,28 @@
 **/audio
 *.mp3
 *.webm
+.env
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
--- a/README.md
+++ b/README.md
@@ -1,2 +1,14 @@
 # ai_sandbox

+A learning arena to learn about the current AI tool landscape
+
+## Subprojects
+
+### [Speech to Speech AI Assistant](./speech-speech/)
+AI assistant chat with speech recognition and tts responses
+
+Fullstack  
+- Vite, TS, React frontend
+- fastapi backend
+- OpenAI for LLM services
+
--- a/speech-speech/.gitignore
+++ b/speech-speech/.gitignore
@@ -1,24 +0,0 @@
-# Logs
-logs
-*.log
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
-pnpm-debug.log*
-lerna-debug.log*
-
-node_modules
-dist
-dist-ssr
-*.local
-
-# Editor directories and files
-.vscode/*
-!.vscode/extensions.json
-.idea
-.DS_Store
-*.suo
-*.ntvs*
-*.njsproj
-*.sln
-*.sw?
--- a/speech-speech/README.md
+++ b/speech-speech/README.md
@@ -0,0 +1,35 @@
+# Speech to Speech AI Assistant
+AI assistant chat with speech recognition and tts responses
+
+Fullstack  
+- Vite, TS, React frontend
+- fastapi backend
+- OpenAI for LLM services
+
+## Requirements
+- python3
+- npm
+- OpenAI API token
+
+## Setup
+```
+cd frontend
+npm install
+npm run build
+
+cd ../backend
+# optionally setup virtual environment of your choice
+python3 -m pip install -r requirements.txt
+```
+
+# Running 
+example `backend/.env`
+```
+OPEN_API_KEY=<apikey>
+```
+
+```
+cd backend
+source .env
+uvicorn --port 8080 api:app
+```
--- a/speech-speech/backend/.env
+++ b/speech-speech/backend/.env
@@ -1 +0,0 @@
-OPENAI_API_KEY=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
--- a/speech-speech/backend/api.py
+++ b/speech-speech/backend/api.py
@@ -1,8 +1,9 @@
 from openai import OpenAI
 from fastapi import FastAPI, File, Response, Request
+from fastapi.staticfiles import StaticFiles
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import FileResponse
 from pydantic import BaseModel
+from io import BytesIO


 app = FastAPI()
@@ -26,22 +27,20 @@ class Conversation(BaseModel):


@app.post("/get-text")
-def stt(audio: bytes = File()):
-    with open("audio.webm", "wb+") as f:
-        f.write(audio)
+async def stt(audio: bytes = File()):
+    with BytesIO(audio) as f:
+        f.name = "audio.mp3"
        transcript = openAI_clinet.audio.transcriptions.create(
            model="whisper-1",
            file=f,
            response_format="text",
-         )
+        )
    data = {"len": len(audio), "user-transcript": transcript}
    return data


@app.post("/conversation")
 async def get_next_response(request: Request):
-    # role = "test"
-    # res_msg = "temp test response"
    messages = await request.json()
    res = openAI_clinet.chat.completions.create(
        model="gpt-3.5-turbo",
@@ -57,10 +56,11 @@ async def get_next_response(request: Request):
@app.get("/speak")
 def tts(text: str):
    res = openAI_clinet.audio.speech.create(
-        model="tts-1",
-        voice="nova",
-        input=text,
-        response_format='mp3'
+        model="tts-1", voice="nova", input=text, response_format="mp3"
    )
-    # this works for now but I need to find a way to stream this to response
    return Response(content=res.content, media_type="audio/mp3")
+
+
+# if this is above other routes it will try and serve files instead of matching
+# the intended route
+app.mount("/", StaticFiles(directory="dist", html=True), name="static")
--- a/speech-speech/frontend/src/App.tsx
+++ b/speech-speech/frontend/src/App.tsx
@@ -2,6 +2,7 @@ import { useState } from "react";
 import { ChatMsg, Controls, Feed, Header } from "./components.tsx";
 import "./App.css";

+let userAudio: Array<Blob> = [];
 let audioBlobs: Array<Blob> = [];
 let streamBeingCaptured: MediaStream | null = null;
 let mediaRecorder: MediaRecorder | null = null;
@@ -51,14 +52,6 @@ function playRecord() {
  audio.play();
 }

-function playMsg(msg: ChatMsg) {
-  const audio = new Audio(
-    "http://100.82.51.22:8001/speak?" +
-    new URLSearchParams({ text: msg.content }),
-  );
-  console.log("loading audio and playing?");
-  audio.play();
-}
 function App() {
  const [recordState, setRecordState] = useState(false);
  const [chatState, setChatState] = useState([{
@@ -77,18 +70,24 @@ function App() {
  }

  function sendAudio() {
-    var formData = new FormData();
-    formData.append("audio", new Blob(audioBlobs, { type: "audio/webm" }));
-    fetch("http://100.82.51.22:8001/get-text", {
+    let formData = new FormData();
+    let audio = new Blob(audioBlobs, { type: "audio/webm" });
+    userAudio.push(audio);
+    formData.append("audio", audio);
+    fetch("/get-text", {
      "method": "POST",
      "body": formData,
    }).then((res) => res.json())
      .then((res) => {
        setChatState((curState: Array<ChatMsg>) => [
          ...curState,
-          { "role": "user", "content": res["user-transcript"] },
+          {
+            "role": "user",
+            "content": res["user-transcript"],
+            "audio": URL.createObjectURL(userAudio[userAudio.length - 1]),
+          },
        ]);
-        fetch("http://100.82.51.22:8001/conversation", {
+        fetch("/conversation", {
          "method": "POST",
          "body": JSON.stringify([...chatState, {
            "role": "user",
@@ -98,9 +97,10 @@ function App() {
          .then((res) => {
            setChatState((
              curState: Array<ChatMsg>,
-            ) => [...curState, res]);
-            console.log("attempting to play result");
-            playMsg(res);
+            ) => [...curState, {
+              ...res,
+              "audio": "/speak?" + new URLSearchParams({ text: res.content }),
+            }]);
          });
      });
  }
--- a/speech-speech/frontend/src/components.tsx
+++ b/speech-speech/frontend/src/components.tsx
@@ -6,10 +6,10 @@ import {
  TbPlayerStop,
 } from "react-icons/tb";

-
 export type ChatMsg = {
  role: string;
  content: string;
+  audio?: string;
 };

 export function Header() {
@@ -59,6 +59,11 @@ export function Msg(props: { msg: ChatMsg }) {
      <span className="ml-8">
        {props.msg.content}
      </span>
+      <audio
+        controls
+        autoPlay={props.msg.role == "assistant"}
+        src={props.msg.audio}
+      />
    </div>
  );
 }
--- a/speech-speech/frontend/vite.config.ts
+++ b/speech-speech/frontend/vite.config.ts
@@ -9,4 +9,7 @@ export default defineConfig({
      "Access-Control-Allow-Origin": '*',
    },
  },
+	build: {
+		outDir: '../backend/dist/',
+	},
 });
Author	SHA1	Message	Date
Andrei Stoica	8aa7bd2e99	cleanup	2024-02-28 12:10:49 -05:00
Andrei Stoica	3c0a9b150b	audio playback for both user and assistant	2024-02-28 11:55:34 -05:00
Andrei Stoica	5cc002a110	assistant response is replayable	2024-02-28 11:33:08 -05:00
Andrei Stoica	7562778f18	merged gitignores	2024-02-26 20:21:17 -05:00
Andrei Stoica	4fd825e1ae	fixing urls	2024-02-26 20:03:58 -05:00
Andrei Stoica	7acdbb3136	cleaning up readmes	2024-02-26 19:58:28 -05:00
Andrei Stoica	f1c2108bc7	updating readmes	2024-02-26 19:43:10 -05:00
Andrei Stoica	056e1067f4	building frontend and serving with fastapi	2024-02-26 13:17:26 -05:00
Andrei Stoica	1916185f19	cleaning up backend sins now	2024-02-26 11:54:00 -05:00
				`@@ -1 +0,0 @@`
				`OPENAI_API_KEY=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`