Compare commits

...

19 Commits

Author SHA1 Message Date
8aa7bd2e99 cleanup 2024-02-28 12:10:49 -05:00
3c0a9b150b audio playback for both user and assistant 2024-02-28 11:55:34 -05:00
5cc002a110 assistant response is replayable 2024-02-28 11:33:08 -05:00
7562778f18 merged gitignores 2024-02-26 20:21:17 -05:00
4fd825e1ae fixing urls 2024-02-26 20:03:58 -05:00
7acdbb3136 cleaning up readmes 2024-02-26 19:58:28 -05:00
f1c2108bc7 updating readmes 2024-02-26 19:43:10 -05:00
056e1067f4 building frontend and serving with fastapi 2024-02-26 13:17:26 -05:00
1916185f19 cleaning up backend sins now 2024-02-26 11:54:00 -05:00
64bb9f9db3 cleaning up my sins 2024-02-26 11:31:01 -05:00
42c605d992 cleanup 2024-02-25 17:47:25 -05:00
b7787be635 file response streamlining 2024-02-25 17:09:35 -05:00
ebcfa7e19e playing back response 2024-02-25 13:40:08 -05:00
baab95660b cors 2024-02-24 17:58:26 -05:00
8af852d82c tts backend 2024-02-20 19:35:36 -05:00
c8fa61e0c3 chat history 2024-02-20 17:29:37 -05:00
6504aeba0d sending conversation to backend 2024-02-20 11:31:36 -05:00
70da8a4ddb backend to interface with ai models 2024-02-20 11:31:01 -05:00
e20c6f04ba moved frontend 2024-02-19 21:50:15 -05:00
25 changed files with 396 additions and 145 deletions

View File

@@ -1,3 +1,9 @@
**/.venv/
**/__pycache__/
**/audio
*.mp3
*.webm
.env
# Logs # Logs
logs logs
*.log *.log

View File

@@ -1,2 +1,14 @@
# ai_sandbox # ai_sandbox
A learning arena to learn about the current AI tool landscape
## Subprojects
### [Speech to Speech AI Assistant](./speech-speech/)
AI assistant chat with speech recognition and tts responses
Fullstack
- Vite, TS, React frontend
- fastapi backend
- OpenAI for LLM services

View File

@@ -1,30 +1,35 @@
# React + TypeScript + Vite # Speech to Speech AI Assistant
AI assistant chat with speech recognition and tts responses
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. Fullstack
- Vite, TS, React frontend
- fastapi backend
- OpenAI for LLM services
Currently, two official plugins are available: ## Requirements
- python3
- npm
- OpenAI API token
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh ## Setup
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh ```
cd frontend
npm install
npm run build
## Expanding the ESLint configuration cd ../backend
# optionally setup virtual environment of your choice
If you are developing a production application, we recommend updating the configuration to enable type aware lint rules: python3 -m pip install -r requirements.txt
- Configure the top-level `parserOptions` property like this:
```js
export default {
// other rules...
parserOptions: {
ecmaVersion: 'latest',
sourceType: 'module',
project: ['./tsconfig.json', './tsconfig.node.json'],
tsconfigRootDir: __dirname,
},
}
``` ```
- Replace `plugin:@typescript-eslint/recommended` to `plugin:@typescript-eslint/recommended-type-checked` or `plugin:@typescript-eslint/strict-type-checked` # Running
- Optionally add `plugin:@typescript-eslint/stylistic-type-checked` example `backend/.env`
- Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and add `plugin:react/recommended` & `plugin:react/jsx-runtime` to the `extends` list ```
OPEN_API_KEY=<apikey>
```
```
cd backend
source .env
uvicorn --port 8080 api:app
```

View File

@@ -0,0 +1,66 @@
from openai import OpenAI
from fastapi import FastAPI, File, Response, Request
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from io import BytesIO
app = FastAPI()
openAI_clinet = OpenAI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
class ConversationMessege(BaseModel):
role: str
content: str
class Conversation(BaseModel):
messages: list[ConversationMessege]
@app.post("/get-text")
async def stt(audio: bytes = File()):
with BytesIO(audio) as f:
f.name = "audio.mp3"
transcript = openAI_clinet.audio.transcriptions.create(
model="whisper-1",
file=f,
response_format="text",
)
data = {"len": len(audio), "user-transcript": transcript}
return data
@app.post("/conversation")
async def get_next_response(request: Request):
messages = await request.json()
res = openAI_clinet.chat.completions.create(
model="gpt-3.5-turbo",
messages=messages,
)
res_msg = res.choices[0].message.content
role = res.choices[0].message.role
print(messages)
print(res_msg)
return {"role": role, "content": res_msg}
@app.get("/speak")
def tts(text: str):
res = openAI_clinet.audio.speech.create(
model="tts-1", voice="nova", input=text, response_format="mp3"
)
return Response(content=res.content, media_type="audio/mp3")
# if this is above other routes it will try and serve files instead of matching
# the intended route
app.mount("/", StaticFiles(directory="dist", html=True), name="static")

View File

@@ -0,0 +1,4 @@
openai
fastapi
uvicorn
python-multipart

View File

@@ -0,0 +1,30 @@
# React + TypeScript + Vite
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
Currently, two official plugins are available:
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
## Expanding the ESLint configuration
If you are developing a production application, we recommend updating the configuration to enable type aware lint rules:
- Configure the top-level `parserOptions` property like this:
```js
export default {
// other rules...
parserOptions: {
ecmaVersion: 'latest',
sourceType: 'module',
project: ['./tsconfig.json', './tsconfig.node.json'],
tsconfigRootDir: __dirname,
},
}
```
- Replace `plugin:@typescript-eslint/recommended` to `plugin:@typescript-eslint/recommended-type-checked` or `plugin:@typescript-eslint/strict-type-checked`
- Optionally add `plugin:@typescript-eslint/stylistic-type-checked`
- Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and add `plugin:react/recommended` & `plugin:react/jsx-runtime` to the `extends` list

View File

@@ -2,9 +2,8 @@
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="UTF-8" /> <meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Vite + React + TS</title> <title>Speach to Speech AI example</title>
</head> </head>
<body> <body>
<div id="root"></div> <div id="root"></div>

View File

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

@@ -0,0 +1,127 @@
import { useState } from "react";
import { ChatMsg, Controls, Feed, Header } from "./components.tsx";
import "./App.css";
let userAudio: Array<Blob> = [];
let audioBlobs: Array<Blob> = [];
let streamBeingCaptured: MediaStream | null = null;
let mediaRecorder: MediaRecorder | null = null;
function get_mic() {
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
console.log("getUserMedia supported.");
return navigator.mediaDevices.getUserMedia({ audio: true });
}
throw "getUserMedia not supported on your browser!";
}
function startRecord() {
audioBlobs = [];
get_mic().then((stream) => {
console.log("got mic");
streamBeingCaptured = stream;
mediaRecorder = new MediaRecorder(stream);
console.log("Starting Recording");
mediaRecorder.addEventListener("dataavailable", (event) => {
audioBlobs.push(event.data);
});
mediaRecorder.start();
});
}
function stopRecord() {
if (!mediaRecorder) {
throw "MediaRecorder not set";
}
if (!streamBeingCaptured) {
throw "Stream not set";
}
mediaRecorder.stop();
streamBeingCaptured.getTracks()
.forEach((track) => track.stop());
mediaRecorder = null;
streamBeingCaptured = null;
console.log("Starting Recording");
console.log(audioBlobs);
}
function playRecord() {
const audioBlob = new Blob(audioBlobs, { type: "audio/webm" });
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
audio.play();
}
function App() {
const [recordState, setRecordState] = useState(false);
const [chatState, setChatState] = useState([{
role: "system",
content: "You are a helpful assistant.",
}]);
function toggleRecord() {
if (recordState == false) {
startRecord();
setRecordState(true);
} else {
stopRecord();
setRecordState(false);
}
}
function sendAudio() {
let formData = new FormData();
let audio = new Blob(audioBlobs, { type: "audio/webm" });
userAudio.push(audio);
formData.append("audio", audio);
fetch("/get-text", {
"method": "POST",
"body": formData,
}).then((res) => res.json())
.then((res) => {
setChatState((curState: Array<ChatMsg>) => [
...curState,
{
"role": "user",
"content": res["user-transcript"],
"audio": URL.createObjectURL(userAudio[userAudio.length - 1]),
},
]);
fetch("/conversation", {
"method": "POST",
"body": JSON.stringify([...chatState, {
"role": "user",
"content": res["user-transcript"],
}]),
}).then((res) => res.json())
.then((res) => {
setChatState((
curState: Array<ChatMsg>,
) => [...curState, {
...res,
"audio": "/speak?" + new URLSearchParams({ text: res.content }),
}]);
});
});
}
return (
<>
<div className="h-screen center flex flex-col">
<div className="w-full max-w-screen-xl self-center">
<Header />
<hr className="mx-3 border-t-4" />
</div>
<Feed chat={chatState} setChatStateFn={setChatState} />
<Controls
recButtonOnClick={toggleRecord}
recordState={recordState}
playButtonOnClick={playRecord}
sendButtonOnClick={sendAudio}
/>
</div>
</>
);
}
export default App;

View File

Before

Width:  |  Height:  |  Size: 4.0 KiB

After

Width:  |  Height:  |  Size: 4.0 KiB

View File

@@ -0,0 +1,106 @@
import { useEffect, useRef } from "react";
import {
TbBrandOpenai,
TbMicrophone2,
TbPlayerPlay,
TbPlayerStop,
} from "react-icons/tb";
export type ChatMsg = {
role: string;
content: string;
audio?: string;
};
export function Header() {
return (
<header className="header p-3">
<div className="title text-5xl font-extrabold">
Speach to Speech AI example
</div>
</header>
);
}
export function Feed(props: { chat: Array<ChatMsg>; setChatStateFn: any }) {
const bottomRef = useRef<any>(null);
const scrollToBottom = () => {
if (bottomRef.current) {
bottomRef.current.scrollIntoView({ behavior: "smooth" });
}
};
useEffect(() => {
scrollToBottom();
console.log("scroll?");
});
return (
<div className="feed grow self-center w-5/6 max-w-screen-lg px-6 py-3 overflow-scroll">
<div className="content-center space-y-2 divide-y-4">
{props.chat.filter((m: ChatMsg) => m.role != "system").map((
m: ChatMsg,
i: number,
) => <Msg key={i} msg={m} />)}
</div>
<div ref={bottomRef} />
</div>
);
}
export function Msg(props: { msg: ChatMsg }) {
return (
<div className="Messege text-lg">
<span className="font-bold">
{props.msg.role.toUpperCase()}:
</span>
<br />
<span className="ml-8">
{props.msg.content}
</span>
<audio
controls
autoPlay={props.msg.role == "assistant"}
src={props.msg.audio}
/>
</div>
);
}
export function Controls(
props: {
recButtonOnClick: Function;
recordState: Boolean;
playButtonOnClick: Function;
sendButtonOnClick: Function;
},
) {
return (
<div className="controls self-center flex justify-evenly p-5 text-5xl border-2 border-b-0 w-1/2 max-w-screen-sm min-w-fit">
<button
onClick={() => props.recButtonOnClick()}
className={"inline-flex " + (props.recordState ? "text-red-500" : "")}
>
{props.recordState ? <TbPlayerStop /> : <TbMicrophone2 />}
{props.recordState ? "STOP" : "REC"}
</button>
<button
onClick={() => props.playButtonOnClick()}
className="inline-flex text-green-500"
>
<TbPlayerPlay /> PLAY
</button>
<button
onClick={() => {
props.sendButtonOnClick();
}}
className="inline-flex"
>
<TbBrandOpenai /> SEND
</button>
</div>
);
}

View File

@@ -0,0 +1,15 @@
import { defineConfig } from "vite";
import react from "@vitejs/plugin-react-swc";
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react()],
server: {
headers: {
"Access-Control-Allow-Origin": '*',
},
},
build: {
outDir: '../backend/dist/',
},
});

View File

@@ -1,112 +0,0 @@
import { useState } from "react";
import { TbMicrophone2, TbPlayerPlay, TbPlayerStop } from "react-icons/tb";
import "./App.css";
function Header() {
return (
<header className="header p-3">
<div className="title text-3xl font-extrabold">
Speach to Speech AI example
</div>
</header>
);
}
let audioBlobs = [];
let streamBeingCaptured = null;
let mediaRecorder = null;
function get_mic() {
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
console.log("getUserMedia supported.");
return navigator.mediaDevices.getUserMedia({ audio: true });
} else {
console.log("getUserMedia not supported on your browser!");
}
}
function startRecord() {
get_mic().then((stream) => {
streamBeingCaptured = stream;
mediaRecorder = new MediaRecorder(stream);
console.log("Starting Recording");
mediaRecorder.addEventListener("dataavailable", (event) => {
audioBlobs.push(event.data);
});
mediaRecorder.start()
});
}
function stopRecord() {
mediaRecorder.stop();
streamBeingCaptured.getTracks()
.forEach((track) => track.stop());
mediaRecorder = null;
streamBeingCaptured = null;
console.log("Starting Recording");
console.log(audioBlobs);
}
function playRecord() {
const audioBlob = new Blob(audioBlobs, { type: "audio/webm" });
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
audio.play();
}
function Feed() {
return (
<div className="feed grow self-center content-center w-5/6 max-w-screen-lg px-6 py-3">
chat history goes here
</div>
);
}
function Controls() {
const [recordState, setRecordState] = useState(false);
function toggleRecord() {
if (recordState == false) {
startRecord();
setRecordState(true);
} else {
stopRecord();
setRecordState(false);
}
}
return (
<div className="controls self-center flex justify-evenly p-5 text-5xl border-2 border-b-0 w-1/2 max-w-screen-sm min-w-fit">
<button
onClick={() => toggleRecord()}
className={"inline-flex " + (recordState ? "text-red-500" : "")}
>
{recordState ? <TbPlayerStop /> : <TbMicrophone2 />}
{recordState ? "STOP" : "REC"}
</button>
<button
onClick={() => playRecord()}
className="inline-flex text-green-500"
>
<TbPlayerPlay /> PLAY
</button>
</div>
);
}
function App() {
return (
<>
<div className="h-screen center flex flex-col">
<div className="w-full max-w-screen-xl self-center">
<Header />
<hr className="mx-3 border-t-4" />
</div>
<Feed />
<Controls />
</div>
</>
);
}
export default App;

View File

@@ -1,7 +0,0 @@
import { defineConfig } from 'vite'
import react from '@vitejs/plugin-react-swc'
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react()],
})