Compare commits
9 Commits
64bb9f9db3
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 8aa7bd2e99 | |||
| 3c0a9b150b | |||
| 5cc002a110 | |||
| 7562778f18 | |||
| 4fd825e1ae | |||
| 7acdbb3136 | |||
| f1c2108bc7 | |||
| 056e1067f4 | |||
| 1916185f19 |
25
.gitignore
vendored
25
.gitignore
vendored
@@ -3,3 +3,28 @@
|
||||
**/audio
|
||||
*.mp3
|
||||
*.webm
|
||||
.env
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
lerna-debug.log*
|
||||
|
||||
node_modules
|
||||
dist
|
||||
dist-ssr
|
||||
*.local
|
||||
|
||||
# Editor directories and files
|
||||
.vscode/*
|
||||
!.vscode/extensions.json
|
||||
.idea
|
||||
.DS_Store
|
||||
*.suo
|
||||
*.ntvs*
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
|
||||
12
README.md
12
README.md
@@ -1,2 +1,14 @@
|
||||
# ai_sandbox
|
||||
|
||||
A learning arena to learn about the current AI tool landscape
|
||||
|
||||
## Subprojects
|
||||
|
||||
### [Speech to Speech AI Assistant](./speech-speech/)
|
||||
AI assistant chat with speech recognition and tts responses
|
||||
|
||||
Fullstack
|
||||
- Vite, TS, React frontend
|
||||
- fastapi backend
|
||||
- OpenAI for LLM services
|
||||
|
||||
|
||||
24
speech-speech/.gitignore
vendored
24
speech-speech/.gitignore
vendored
@@ -1,24 +0,0 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
lerna-debug.log*
|
||||
|
||||
node_modules
|
||||
dist
|
||||
dist-ssr
|
||||
*.local
|
||||
|
||||
# Editor directories and files
|
||||
.vscode/*
|
||||
!.vscode/extensions.json
|
||||
.idea
|
||||
.DS_Store
|
||||
*.suo
|
||||
*.ntvs*
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
35
speech-speech/README.md
Normal file
35
speech-speech/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# Speech to Speech AI Assistant
|
||||
AI assistant chat with speech recognition and tts responses
|
||||
|
||||
Fullstack
|
||||
- Vite, TS, React frontend
|
||||
- fastapi backend
|
||||
- OpenAI for LLM services
|
||||
|
||||
## Requirements
|
||||
- python3
|
||||
- npm
|
||||
- OpenAI API token
|
||||
|
||||
## Setup
|
||||
```
|
||||
cd frontend
|
||||
npm install
|
||||
npm run build
|
||||
|
||||
cd ../backend
|
||||
# optionally setup virtual environment of your choice
|
||||
python3 -m pip install -r requirements.txt
|
||||
```
|
||||
|
||||
# Running
|
||||
example `backend/.env`
|
||||
```
|
||||
OPEN_API_KEY=<apikey>
|
||||
```
|
||||
|
||||
```
|
||||
cd backend
|
||||
source .env
|
||||
uvicorn --port 8080 api:app
|
||||
```
|
||||
@@ -1 +0,0 @@
|
||||
OPENAI_API_KEY=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
|
||||
@@ -1,8 +1,9 @@
|
||||
from openai import OpenAI
|
||||
from fastapi import FastAPI, File, Response, Request
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse
|
||||
from pydantic import BaseModel
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
@@ -26,22 +27,20 @@ class Conversation(BaseModel):
|
||||
|
||||
|
||||
@app.post("/get-text")
|
||||
def stt(audio: bytes = File()):
|
||||
with open("audio.webm", "wb+") as f:
|
||||
f.write(audio)
|
||||
async def stt(audio: bytes = File()):
|
||||
with BytesIO(audio) as f:
|
||||
f.name = "audio.mp3"
|
||||
transcript = openAI_clinet.audio.transcriptions.create(
|
||||
model="whisper-1",
|
||||
file=f,
|
||||
response_format="text",
|
||||
)
|
||||
)
|
||||
data = {"len": len(audio), "user-transcript": transcript}
|
||||
return data
|
||||
|
||||
|
||||
@app.post("/conversation")
|
||||
async def get_next_response(request: Request):
|
||||
# role = "test"
|
||||
# res_msg = "temp test response"
|
||||
messages = await request.json()
|
||||
res = openAI_clinet.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
@@ -57,10 +56,11 @@ async def get_next_response(request: Request):
|
||||
@app.get("/speak")
|
||||
def tts(text: str):
|
||||
res = openAI_clinet.audio.speech.create(
|
||||
model="tts-1",
|
||||
voice="nova",
|
||||
input=text,
|
||||
response_format='mp3'
|
||||
model="tts-1", voice="nova", input=text, response_format="mp3"
|
||||
)
|
||||
# this works for now but I need to find a way to stream this to response
|
||||
return Response(content=res.content, media_type="audio/mp3")
|
||||
|
||||
|
||||
# if this is above other routes it will try and serve files instead of matching
|
||||
# the intended route
|
||||
app.mount("/", StaticFiles(directory="dist", html=True), name="static")
|
||||
|
||||
@@ -2,6 +2,7 @@ import { useState } from "react";
|
||||
import { ChatMsg, Controls, Feed, Header } from "./components.tsx";
|
||||
import "./App.css";
|
||||
|
||||
let userAudio: Array<Blob> = [];
|
||||
let audioBlobs: Array<Blob> = [];
|
||||
let streamBeingCaptured: MediaStream | null = null;
|
||||
let mediaRecorder: MediaRecorder | null = null;
|
||||
@@ -51,14 +52,6 @@ function playRecord() {
|
||||
audio.play();
|
||||
}
|
||||
|
||||
function playMsg(msg: ChatMsg) {
|
||||
const audio = new Audio(
|
||||
"http://100.82.51.22:8001/speak?" +
|
||||
new URLSearchParams({ text: msg.content }),
|
||||
);
|
||||
console.log("loading audio and playing?");
|
||||
audio.play();
|
||||
}
|
||||
function App() {
|
||||
const [recordState, setRecordState] = useState(false);
|
||||
const [chatState, setChatState] = useState([{
|
||||
@@ -77,18 +70,24 @@ function App() {
|
||||
}
|
||||
|
||||
function sendAudio() {
|
||||
var formData = new FormData();
|
||||
formData.append("audio", new Blob(audioBlobs, { type: "audio/webm" }));
|
||||
fetch("http://100.82.51.22:8001/get-text", {
|
||||
let formData = new FormData();
|
||||
let audio = new Blob(audioBlobs, { type: "audio/webm" });
|
||||
userAudio.push(audio);
|
||||
formData.append("audio", audio);
|
||||
fetch("/get-text", {
|
||||
"method": "POST",
|
||||
"body": formData,
|
||||
}).then((res) => res.json())
|
||||
.then((res) => {
|
||||
setChatState((curState: Array<ChatMsg>) => [
|
||||
...curState,
|
||||
{ "role": "user", "content": res["user-transcript"] },
|
||||
{
|
||||
"role": "user",
|
||||
"content": res["user-transcript"],
|
||||
"audio": URL.createObjectURL(userAudio[userAudio.length - 1]),
|
||||
},
|
||||
]);
|
||||
fetch("http://100.82.51.22:8001/conversation", {
|
||||
fetch("/conversation", {
|
||||
"method": "POST",
|
||||
"body": JSON.stringify([...chatState, {
|
||||
"role": "user",
|
||||
@@ -98,9 +97,10 @@ function App() {
|
||||
.then((res) => {
|
||||
setChatState((
|
||||
curState: Array<ChatMsg>,
|
||||
) => [...curState, res]);
|
||||
console.log("attempting to play result");
|
||||
playMsg(res);
|
||||
) => [...curState, {
|
||||
...res,
|
||||
"audio": "/speak?" + new URLSearchParams({ text: res.content }),
|
||||
}]);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -6,10 +6,10 @@ import {
|
||||
TbPlayerStop,
|
||||
} from "react-icons/tb";
|
||||
|
||||
|
||||
export type ChatMsg = {
|
||||
role: string;
|
||||
content: string;
|
||||
audio?: string;
|
||||
};
|
||||
|
||||
export function Header() {
|
||||
@@ -59,6 +59,11 @@ export function Msg(props: { msg: ChatMsg }) {
|
||||
<span className="ml-8">
|
||||
{props.msg.content}
|
||||
</span>
|
||||
<audio
|
||||
controls
|
||||
autoPlay={props.msg.role == "assistant"}
|
||||
src={props.msg.audio}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -9,4 +9,7 @@ export default defineConfig({
|
||||
"Access-Control-Allow-Origin": '*',
|
||||
},
|
||||
},
|
||||
build: {
|
||||
outDir: '../backend/dist/',
|
||||
},
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user