Compare commits

...

9 Commits

Author SHA1 Message Date
8aa7bd2e99 cleanup 2024-02-28 12:10:49 -05:00
3c0a9b150b audio playback for both user and assistant 2024-02-28 11:55:34 -05:00
5cc002a110 assistant response is replayable 2024-02-28 11:33:08 -05:00
7562778f18 merged gitignores 2024-02-26 20:21:17 -05:00
4fd825e1ae fixing urls 2024-02-26 20:03:58 -05:00
7acdbb3136 cleaning up readmes 2024-02-26 19:58:28 -05:00
f1c2108bc7 updating readmes 2024-02-26 19:43:10 -05:00
056e1067f4 building frontend and serving with fastapi 2024-02-26 13:17:26 -05:00
1916185f19 cleaning up backend sins now 2024-02-26 11:54:00 -05:00
9 changed files with 109 additions and 54 deletions

25
.gitignore vendored
View File

@@ -3,3 +3,28 @@
**/audio **/audio
*.mp3 *.mp3
*.webm *.webm
.env
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

View File

@@ -1,2 +1,14 @@
# ai_sandbox # ai_sandbox
A learning arena to learn about the current AI tool landscape
## Subprojects
### [Speech to Speech AI Assistant](./speech-speech/)
AI assistant chat with speech recognition and tts responses
Fullstack
- Vite, TS, React frontend
- fastapi backend
- OpenAI for LLM services

View File

@@ -1,24 +0,0 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

35
speech-speech/README.md Normal file
View File

@@ -0,0 +1,35 @@
# Speech to Speech AI Assistant
AI assistant chat with speech recognition and tts responses
Fullstack
- Vite, TS, React frontend
- fastapi backend
- OpenAI for LLM services
## Requirements
- python3
- npm
- OpenAI API token
## Setup
```
cd frontend
npm install
npm run build
cd ../backend
# optionally setup virtual environment of your choice
python3 -m pip install -r requirements.txt
```
# Running
example `backend/.env`
```
OPEN_API_KEY=<apikey>
```
```
cd backend
source .env
uvicorn --port 8080 api:app
```

View File

@@ -1 +0,0 @@
OPENAI_API_KEY=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

View File

@@ -1,8 +1,9 @@
from openai import OpenAI from openai import OpenAI
from fastapi import FastAPI, File, Response, Request from fastapi import FastAPI, File, Response, Request
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from pydantic import BaseModel from pydantic import BaseModel
from io import BytesIO
app = FastAPI() app = FastAPI()
@@ -26,9 +27,9 @@ class Conversation(BaseModel):
@app.post("/get-text") @app.post("/get-text")
def stt(audio: bytes = File()): async def stt(audio: bytes = File()):
with open("audio.webm", "wb+") as f: with BytesIO(audio) as f:
f.write(audio) f.name = "audio.mp3"
transcript = openAI_clinet.audio.transcriptions.create( transcript = openAI_clinet.audio.transcriptions.create(
model="whisper-1", model="whisper-1",
file=f, file=f,
@@ -40,8 +41,6 @@ def stt(audio: bytes = File()):
@app.post("/conversation") @app.post("/conversation")
async def get_next_response(request: Request): async def get_next_response(request: Request):
# role = "test"
# res_msg = "temp test response"
messages = await request.json() messages = await request.json()
res = openAI_clinet.chat.completions.create( res = openAI_clinet.chat.completions.create(
model="gpt-3.5-turbo", model="gpt-3.5-turbo",
@@ -57,10 +56,11 @@ async def get_next_response(request: Request):
@app.get("/speak") @app.get("/speak")
def tts(text: str): def tts(text: str):
res = openAI_clinet.audio.speech.create( res = openAI_clinet.audio.speech.create(
model="tts-1", model="tts-1", voice="nova", input=text, response_format="mp3"
voice="nova",
input=text,
response_format='mp3'
) )
# this works for now but I need to find a way to stream this to response
return Response(content=res.content, media_type="audio/mp3") return Response(content=res.content, media_type="audio/mp3")
# if this is above other routes it will try and serve files instead of matching
# the intended route
app.mount("/", StaticFiles(directory="dist", html=True), name="static")

View File

@@ -2,6 +2,7 @@ import { useState } from "react";
import { ChatMsg, Controls, Feed, Header } from "./components.tsx"; import { ChatMsg, Controls, Feed, Header } from "./components.tsx";
import "./App.css"; import "./App.css";
let userAudio: Array<Blob> = [];
let audioBlobs: Array<Blob> = []; let audioBlobs: Array<Blob> = [];
let streamBeingCaptured: MediaStream | null = null; let streamBeingCaptured: MediaStream | null = null;
let mediaRecorder: MediaRecorder | null = null; let mediaRecorder: MediaRecorder | null = null;
@@ -51,14 +52,6 @@ function playRecord() {
audio.play(); audio.play();
} }
function playMsg(msg: ChatMsg) {
const audio = new Audio(
"http://100.82.51.22:8001/speak?" +
new URLSearchParams({ text: msg.content }),
);
console.log("loading audio and playing?");
audio.play();
}
function App() { function App() {
const [recordState, setRecordState] = useState(false); const [recordState, setRecordState] = useState(false);
const [chatState, setChatState] = useState([{ const [chatState, setChatState] = useState([{
@@ -77,18 +70,24 @@ function App() {
} }
function sendAudio() { function sendAudio() {
var formData = new FormData(); let formData = new FormData();
formData.append("audio", new Blob(audioBlobs, { type: "audio/webm" })); let audio = new Blob(audioBlobs, { type: "audio/webm" });
fetch("http://100.82.51.22:8001/get-text", { userAudio.push(audio);
formData.append("audio", audio);
fetch("/get-text", {
"method": "POST", "method": "POST",
"body": formData, "body": formData,
}).then((res) => res.json()) }).then((res) => res.json())
.then((res) => { .then((res) => {
setChatState((curState: Array<ChatMsg>) => [ setChatState((curState: Array<ChatMsg>) => [
...curState, ...curState,
{ "role": "user", "content": res["user-transcript"] }, {
"role": "user",
"content": res["user-transcript"],
"audio": URL.createObjectURL(userAudio[userAudio.length - 1]),
},
]); ]);
fetch("http://100.82.51.22:8001/conversation", { fetch("/conversation", {
"method": "POST", "method": "POST",
"body": JSON.stringify([...chatState, { "body": JSON.stringify([...chatState, {
"role": "user", "role": "user",
@@ -98,9 +97,10 @@ function App() {
.then((res) => { .then((res) => {
setChatState(( setChatState((
curState: Array<ChatMsg>, curState: Array<ChatMsg>,
) => [...curState, res]); ) => [...curState, {
console.log("attempting to play result"); ...res,
playMsg(res); "audio": "/speak?" + new URLSearchParams({ text: res.content }),
}]);
}); });
}); });
} }

View File

@@ -6,10 +6,10 @@ import {
TbPlayerStop, TbPlayerStop,
} from "react-icons/tb"; } from "react-icons/tb";
export type ChatMsg = { export type ChatMsg = {
role: string; role: string;
content: string; content: string;
audio?: string;
}; };
export function Header() { export function Header() {
@@ -59,6 +59,11 @@ export function Msg(props: { msg: ChatMsg }) {
<span className="ml-8"> <span className="ml-8">
{props.msg.content} {props.msg.content}
</span> </span>
<audio
controls
autoPlay={props.msg.role == "assistant"}
src={props.msg.audio}
/>
</div> </div>
); );
} }

View File

@@ -9,4 +9,7 @@ export default defineConfig({
"Access-Control-Allow-Origin": '*', "Access-Control-Allow-Origin": '*',
}, },
}, },
build: {
outDir: '../backend/dist/',
},
}); });