Compare commits

..

15 Commits

Author SHA1 Message Date
8aa7bd2e99 cleanup 2024-02-28 12:10:49 -05:00
3c0a9b150b audio playback for both user and assistant 2024-02-28 11:55:34 -05:00
5cc002a110 assistant response is replayable 2024-02-28 11:33:08 -05:00
7562778f18 merged gitignores 2024-02-26 20:21:17 -05:00
4fd825e1ae fixing urls 2024-02-26 20:03:58 -05:00
7acdbb3136 cleaning up readmes 2024-02-26 19:58:28 -05:00
f1c2108bc7 updating readmes 2024-02-26 19:43:10 -05:00
056e1067f4 building frontend and serving with fastapi 2024-02-26 13:17:26 -05:00
1916185f19 cleaning up backend sins now 2024-02-26 11:54:00 -05:00
64bb9f9db3 cleaning up my sins 2024-02-26 11:31:01 -05:00
42c605d992 cleanup 2024-02-25 17:47:25 -05:00
b7787be635 file response streamlining 2024-02-25 17:09:35 -05:00
ebcfa7e19e playing back response 2024-02-25 13:40:08 -05:00
baab95660b cors 2024-02-24 17:58:26 -05:00
8af852d82c tts backend 2024-02-20 19:35:36 -05:00
11 changed files with 260 additions and 158 deletions

28
.gitignore vendored
View File

@@ -1,2 +1,30 @@
**/.venv/
**/__pycache__/
**/audio
*.mp3
*.webm
.env
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

View File

@@ -1,2 +1,14 @@
# ai_sandbox
A learning arena to learn about the current AI tool landscape
## Subprojects
### [Speech to Speech AI Assistant](./speech-speech/)
AI assistant chat with speech recognition and tts responses
Fullstack
- Vite, TS, React frontend
- fastapi backend
- OpenAI for LLM services

View File

@@ -1,24 +0,0 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

35
speech-speech/README.md Normal file
View File

@@ -0,0 +1,35 @@
# Speech to Speech AI Assistant
AI assistant chat with speech recognition and tts responses
Fullstack
- Vite, TS, React frontend
- fastapi backend
- OpenAI for LLM services
## Requirements
- python3
- npm
- OpenAI API token
## Setup
```
cd frontend
npm install
npm run build
cd ../backend
# optionally setup virtual environment of your choice
python3 -m pip install -r requirements.txt
```
# Running
example `backend/.env`
```
OPEN_API_KEY=<apikey>
```
```
cd backend
source .env
uvicorn --port 8080 api:app
```

View File

@@ -1 +0,0 @@
OPENAI_API_KEY=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

View File

@@ -1,12 +1,20 @@
from openai import OpenAI
from fastapi import FastAPI, File, Response, Request
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import whisper
from io import BytesIO
app = FastAPI()
openAI_clinet = OpenAI()
model = whisper.load_model("base")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
class ConversationMessege(BaseModel):
@@ -19,26 +27,20 @@ class Conversation(BaseModel):
@app.post("/get-text")
def get_text(response: Response, audio: bytes = File()):
response.headers["Access-Control-Allow-Origin"] = "*"
with open("audio", "wb") as f:
f.write(audio)
# transcript = openAI_clinet.audio.transcriptions.create(
# model="whisper-1",
# file=audio,
# response_format="text",
# RequestBody
# )
result = model.transcribe("audio")
data = {"len": len(audio), "user-transcript": result["text"]}
async def stt(audio: bytes = File()):
with BytesIO(audio) as f:
f.name = "audio.mp3"
transcript = openAI_clinet.audio.transcriptions.create(
model="whisper-1",
file=f,
response_format="text",
)
data = {"len": len(audio), "user-transcript": transcript}
return data
@app.post("/conversation")
async def get_next_response(request: Request, response: Response):
response.headers["Access-Control-Allow-Origin"] = "*"
#role = "test"
#res_msg = "temp test response"
async def get_next_response(request: Request):
messages = await request.json()
res = openAI_clinet.chat.completions.create(
model="gpt-3.5-turbo",
@@ -49,3 +51,16 @@ async def get_next_response(request: Request, response: Response):
print(messages)
print(res_msg)
return {"role": role, "content": res_msg}
@app.get("/speak")
def tts(text: str):
res = openAI_clinet.audio.speech.create(
model="tts-1", voice="nova", input=text, response_format="mp3"
)
return Response(content=res.content, media_type="audio/mp3")
# if this is above other routes it will try and serve files instead of matching
# the intended route
app.mount("/", StaticFiles(directory="dist", html=True), name="static")

Binary file not shown.

View File

@@ -2,9 +2,8 @@
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Vite + React + TS</title>
<title>Speach to Speech AI example</title>
</head>
<body>
<div id="root"></div>

View File

@@ -1,47 +1,24 @@
import { useEffect, useRef, useState } from "react";
import {
TbBrandOpenai,
TbMicrophone2,
TbPlayerPlay,
TbPlayerStop,
} from "react-icons/tb";
import { useState } from "react";
import { ChatMsg, Controls, Feed, Header } from "./components.tsx";
import "./App.css";
type ChatMsg = {
role: string;
content: string;
};
function Header() {
return (
<header className="header p-3">
<div className="title text-5xl font-extrabold">
Speach to Speech AI example
</div>
</header>
);
}
let audioBlobs = [];
let userAudio: Array<Blob> = [];
let audioBlobs: Array<Blob> = [];
let streamBeingCaptured: MediaStream | null = null;
let mediaRecorder: MediaRecorder | null = null;
let chat: Array<ChatMsg> = [{
role: "system",
content: "You are a helpful assistant.",
}];
function get_mic() {
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
console.log("getUserMedia supported.");
return navigator.mediaDevices.getUserMedia({ audio: true });
} else {
console.log("getUserMedia not supported on your browser!");
}
throw "getUserMedia not supported on your browser!";
}
function startRecord() {
audioBlobs = [];
get_mic().then((stream) => {
console.log("got mic");
streamBeingCaptured = stream;
mediaRecorder = new MediaRecorder(stream);
console.log("Starting Recording");
@@ -53,6 +30,12 @@ function startRecord() {
}
function stopRecord() {
if (!mediaRecorder) {
throw "MediaRecorder not set";
}
if (!streamBeingCaptured) {
throw "Stream not set";
}
mediaRecorder.stop();
streamBeingCaptured.getTracks()
.forEach((track) => track.stop());
@@ -69,46 +52,12 @@ function playRecord() {
audio.play();
}
function Feed(props: { chat: Array[ChatMsg]; setChatStateFn: any }) {
const bottomRef = useRef(null);
const scrollToBottom = () => {
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
};
useEffect(() => {
scrollToBottom();
console.log("scroll?");
});
return (
<div className="feed grow self-center w-5/6 max-w-screen-lg px-6 py-3 overflow-scroll">
<div className="content-center space-y-2 divide-y-4">
{props.chat.filter((m: ChatMsg) => m.role != "system").map((
m: ChatMsg,
) => <Msg msg={m} />)}
</div>
<div ref={bottomRef} />
</div>
);
}
function Msg(props: { msg: ChatMsg }) {
return (
<div className="Messege text-lg">
<span className="font-bold">
{props.msg.role.toUpperCase()}:
</span>
<br />
<span className="ml-8">
{props.msg.content}
</span>
</div>
);
}
function Controls(props: { setChatStateFn: any; chat: Array[ChatMsg] }) {
function App() {
const [recordState, setRecordState] = useState(false);
const [chatState, setChatState] = useState([{
role: "system",
content: "You are a helpful assistant.",
}]);
function toggleRecord() {
if (recordState == false) {
@@ -121,65 +70,40 @@ function Controls(props: { setChatStateFn: any; chat: Array[ChatMsg] }) {
}
function sendAudio() {
var formData = new FormData();
formData.append("audio", new Blob(audioBlobs, { type: "audio/webm" }));
fetch("http://100.82.51.22:8001/get-text", {
let formData = new FormData();
let audio = new Blob(audioBlobs, { type: "audio/webm" });
userAudio.push(audio);
formData.append("audio", audio);
fetch("/get-text", {
"method": "POST",
"body": formData,
}).then((res) => res.json())
.then((res) => {
console.log(res);
props.setChatStateFn((curState) => [
setChatState((curState: Array<ChatMsg>) => [
...curState,
{ "role": "user", "content": res["user-transcript"] },
{
"role": "user",
"content": res["user-transcript"],
"audio": URL.createObjectURL(userAudio[userAudio.length - 1]),
},
]);
fetch("http://100.82.51.22:8001/conversation", {
fetch("/conversation", {
"method": "POST",
"body": JSON.stringify([...props.chat, {
"body": JSON.stringify([...chatState, {
"role": "user",
"content": res["user-transcript"],
}]),
}).then((res) => res.json())
.then((res) => {
props.setChatStateFn((curState) => [...curState, res]);
});
});
}
return (
<div className="controls self-center flex justify-evenly p-5 text-5xl border-2 border-b-0 w-1/2 max-w-screen-sm min-w-fit">
<button
onClick={() => toggleRecord()}
className={"inline-flex " + (recordState ? "text-red-500" : "")}
>
{recordState ? <TbPlayerStop /> : <TbMicrophone2 />}
{recordState ? "STOP" : "REC"}
</button>
<button
onClick={() => playRecord()}
className="inline-flex text-green-500"
>
<TbPlayerPlay /> PLAY
</button>
<button
onClick={() => {
sendAudio();
}}
className="inline-flex"
>
<TbBrandOpenai /> SEND
</button>
</div>
);
}
function App() {
const [chatState, setChatState] = useState([{
role: "system",
content: "You are a helpful assistant.",
setChatState((
curState: Array<ChatMsg>,
) => [...curState, {
...res,
"audio": "/speak?" + new URLSearchParams({ text: res.content }),
}]);
});
});
}
return (
<>
@@ -189,7 +113,12 @@ function App() {
<hr className="mx-3 border-t-4" />
</div>
<Feed chat={chatState} setChatStateFn={setChatState} />
<Controls setChatStateFn={setChatState} chat={chatState} />
<Controls
recButtonOnClick={toggleRecord}
recordState={recordState}
playButtonOnClick={playRecord}
sendButtonOnClick={sendAudio}
/>
</div>
</>
);

View File

@@ -0,0 +1,106 @@
import { useEffect, useRef } from "react";
import {
TbBrandOpenai,
TbMicrophone2,
TbPlayerPlay,
TbPlayerStop,
} from "react-icons/tb";
export type ChatMsg = {
role: string;
content: string;
audio?: string;
};
export function Header() {
return (
<header className="header p-3">
<div className="title text-5xl font-extrabold">
Speach to Speech AI example
</div>
</header>
);
}
export function Feed(props: { chat: Array<ChatMsg>; setChatStateFn: any }) {
const bottomRef = useRef<any>(null);
const scrollToBottom = () => {
if (bottomRef.current) {
bottomRef.current.scrollIntoView({ behavior: "smooth" });
}
};
useEffect(() => {
scrollToBottom();
console.log("scroll?");
});
return (
<div className="feed grow self-center w-5/6 max-w-screen-lg px-6 py-3 overflow-scroll">
<div className="content-center space-y-2 divide-y-4">
{props.chat.filter((m: ChatMsg) => m.role != "system").map((
m: ChatMsg,
i: number,
) => <Msg key={i} msg={m} />)}
</div>
<div ref={bottomRef} />
</div>
);
}
export function Msg(props: { msg: ChatMsg }) {
return (
<div className="Messege text-lg">
<span className="font-bold">
{props.msg.role.toUpperCase()}:
</span>
<br />
<span className="ml-8">
{props.msg.content}
</span>
<audio
controls
autoPlay={props.msg.role == "assistant"}
src={props.msg.audio}
/>
</div>
);
}
export function Controls(
props: {
recButtonOnClick: Function;
recordState: Boolean;
playButtonOnClick: Function;
sendButtonOnClick: Function;
},
) {
return (
<div className="controls self-center flex justify-evenly p-5 text-5xl border-2 border-b-0 w-1/2 max-w-screen-sm min-w-fit">
<button
onClick={() => props.recButtonOnClick()}
className={"inline-flex " + (props.recordState ? "text-red-500" : "")}
>
{props.recordState ? <TbPlayerStop /> : <TbMicrophone2 />}
{props.recordState ? "STOP" : "REC"}
</button>
<button
onClick={() => props.playButtonOnClick()}
className="inline-flex text-green-500"
>
<TbPlayerPlay /> PLAY
</button>
<button
onClick={() => {
props.sendButtonOnClick();
}}
className="inline-flex"
>
<TbBrandOpenai /> SEND
</button>
</div>
);
}

View File

@@ -9,4 +9,7 @@ export default defineConfig({
"Access-Control-Allow-Origin": '*',
},
},
build: {
outDir: '../backend/dist/',
},
});