Compare commits
9 Commits
64bb9f9db3
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 8aa7bd2e99 | |||
| 3c0a9b150b | |||
| 5cc002a110 | |||
| 7562778f18 | |||
| 4fd825e1ae | |||
| 7acdbb3136 | |||
| f1c2108bc7 | |||
| 056e1067f4 | |||
| 1916185f19 |
25
.gitignore
vendored
25
.gitignore
vendored
@@ -3,3 +3,28 @@
|
|||||||
**/audio
|
**/audio
|
||||||
*.mp3
|
*.mp3
|
||||||
*.webm
|
*.webm
|
||||||
|
.env
|
||||||
|
# Logs
|
||||||
|
logs
|
||||||
|
*.log
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
pnpm-debug.log*
|
||||||
|
lerna-debug.log*
|
||||||
|
|
||||||
|
node_modules
|
||||||
|
dist
|
||||||
|
dist-ssr
|
||||||
|
*.local
|
||||||
|
|
||||||
|
# Editor directories and files
|
||||||
|
.vscode/*
|
||||||
|
!.vscode/extensions.json
|
||||||
|
.idea
|
||||||
|
.DS_Store
|
||||||
|
*.suo
|
||||||
|
*.ntvs*
|
||||||
|
*.njsproj
|
||||||
|
*.sln
|
||||||
|
*.sw?
|
||||||
|
|||||||
12
README.md
12
README.md
@@ -1,2 +1,14 @@
|
|||||||
# ai_sandbox
|
# ai_sandbox
|
||||||
|
|
||||||
|
A learning arena to learn about the current AI tool landscape
|
||||||
|
|
||||||
|
## Subprojects
|
||||||
|
|
||||||
|
### [Speech to Speech AI Assistant](./speech-speech/)
|
||||||
|
AI assistant chat with speech recognition and tts responses
|
||||||
|
|
||||||
|
Fullstack
|
||||||
|
- Vite, TS, React frontend
|
||||||
|
- fastapi backend
|
||||||
|
- OpenAI for LLM services
|
||||||
|
|
||||||
|
|||||||
24
speech-speech/.gitignore
vendored
24
speech-speech/.gitignore
vendored
@@ -1,24 +0,0 @@
|
|||||||
# Logs
|
|
||||||
logs
|
|
||||||
*.log
|
|
||||||
npm-debug.log*
|
|
||||||
yarn-debug.log*
|
|
||||||
yarn-error.log*
|
|
||||||
pnpm-debug.log*
|
|
||||||
lerna-debug.log*
|
|
||||||
|
|
||||||
node_modules
|
|
||||||
dist
|
|
||||||
dist-ssr
|
|
||||||
*.local
|
|
||||||
|
|
||||||
# Editor directories and files
|
|
||||||
.vscode/*
|
|
||||||
!.vscode/extensions.json
|
|
||||||
.idea
|
|
||||||
.DS_Store
|
|
||||||
*.suo
|
|
||||||
*.ntvs*
|
|
||||||
*.njsproj
|
|
||||||
*.sln
|
|
||||||
*.sw?
|
|
||||||
35
speech-speech/README.md
Normal file
35
speech-speech/README.md
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# Speech to Speech AI Assistant
|
||||||
|
AI assistant chat with speech recognition and tts responses
|
||||||
|
|
||||||
|
Fullstack
|
||||||
|
- Vite, TS, React frontend
|
||||||
|
- fastapi backend
|
||||||
|
- OpenAI for LLM services
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
- python3
|
||||||
|
- npm
|
||||||
|
- OpenAI API token
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
```
|
||||||
|
cd frontend
|
||||||
|
npm install
|
||||||
|
npm run build
|
||||||
|
|
||||||
|
cd ../backend
|
||||||
|
# optionally setup virtual environment of your choice
|
||||||
|
python3 -m pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
# Running
|
||||||
|
example `backend/.env`
|
||||||
|
```
|
||||||
|
OPEN_API_KEY=<apikey>
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
cd backend
|
||||||
|
source .env
|
||||||
|
uvicorn --port 8080 api:app
|
||||||
|
```
|
||||||
@@ -1 +0,0 @@
|
|||||||
OPENAI_API_KEY=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
|
|
||||||
@@ -1,8 +1,9 @@
|
|||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from fastapi import FastAPI, File, Response, Request
|
from fastapi import FastAPI, File, Response, Request
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.responses import FileResponse
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
@@ -26,22 +27,20 @@ class Conversation(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
@app.post("/get-text")
|
@app.post("/get-text")
|
||||||
def stt(audio: bytes = File()):
|
async def stt(audio: bytes = File()):
|
||||||
with open("audio.webm", "wb+") as f:
|
with BytesIO(audio) as f:
|
||||||
f.write(audio)
|
f.name = "audio.mp3"
|
||||||
transcript = openAI_clinet.audio.transcriptions.create(
|
transcript = openAI_clinet.audio.transcriptions.create(
|
||||||
model="whisper-1",
|
model="whisper-1",
|
||||||
file=f,
|
file=f,
|
||||||
response_format="text",
|
response_format="text",
|
||||||
)
|
)
|
||||||
data = {"len": len(audio), "user-transcript": transcript}
|
data = {"len": len(audio), "user-transcript": transcript}
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
@app.post("/conversation")
|
@app.post("/conversation")
|
||||||
async def get_next_response(request: Request):
|
async def get_next_response(request: Request):
|
||||||
# role = "test"
|
|
||||||
# res_msg = "temp test response"
|
|
||||||
messages = await request.json()
|
messages = await request.json()
|
||||||
res = openAI_clinet.chat.completions.create(
|
res = openAI_clinet.chat.completions.create(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
@@ -57,10 +56,11 @@ async def get_next_response(request: Request):
|
|||||||
@app.get("/speak")
|
@app.get("/speak")
|
||||||
def tts(text: str):
|
def tts(text: str):
|
||||||
res = openAI_clinet.audio.speech.create(
|
res = openAI_clinet.audio.speech.create(
|
||||||
model="tts-1",
|
model="tts-1", voice="nova", input=text, response_format="mp3"
|
||||||
voice="nova",
|
|
||||||
input=text,
|
|
||||||
response_format='mp3'
|
|
||||||
)
|
)
|
||||||
# this works for now but I need to find a way to stream this to response
|
|
||||||
return Response(content=res.content, media_type="audio/mp3")
|
return Response(content=res.content, media_type="audio/mp3")
|
||||||
|
|
||||||
|
|
||||||
|
# if this is above other routes it will try and serve files instead of matching
|
||||||
|
# the intended route
|
||||||
|
app.mount("/", StaticFiles(directory="dist", html=True), name="static")
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import { useState } from "react";
|
|||||||
import { ChatMsg, Controls, Feed, Header } from "./components.tsx";
|
import { ChatMsg, Controls, Feed, Header } from "./components.tsx";
|
||||||
import "./App.css";
|
import "./App.css";
|
||||||
|
|
||||||
|
let userAudio: Array<Blob> = [];
|
||||||
let audioBlobs: Array<Blob> = [];
|
let audioBlobs: Array<Blob> = [];
|
||||||
let streamBeingCaptured: MediaStream | null = null;
|
let streamBeingCaptured: MediaStream | null = null;
|
||||||
let mediaRecorder: MediaRecorder | null = null;
|
let mediaRecorder: MediaRecorder | null = null;
|
||||||
@@ -51,14 +52,6 @@ function playRecord() {
|
|||||||
audio.play();
|
audio.play();
|
||||||
}
|
}
|
||||||
|
|
||||||
function playMsg(msg: ChatMsg) {
|
|
||||||
const audio = new Audio(
|
|
||||||
"http://100.82.51.22:8001/speak?" +
|
|
||||||
new URLSearchParams({ text: msg.content }),
|
|
||||||
);
|
|
||||||
console.log("loading audio and playing?");
|
|
||||||
audio.play();
|
|
||||||
}
|
|
||||||
function App() {
|
function App() {
|
||||||
const [recordState, setRecordState] = useState(false);
|
const [recordState, setRecordState] = useState(false);
|
||||||
const [chatState, setChatState] = useState([{
|
const [chatState, setChatState] = useState([{
|
||||||
@@ -77,18 +70,24 @@ function App() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function sendAudio() {
|
function sendAudio() {
|
||||||
var formData = new FormData();
|
let formData = new FormData();
|
||||||
formData.append("audio", new Blob(audioBlobs, { type: "audio/webm" }));
|
let audio = new Blob(audioBlobs, { type: "audio/webm" });
|
||||||
fetch("http://100.82.51.22:8001/get-text", {
|
userAudio.push(audio);
|
||||||
|
formData.append("audio", audio);
|
||||||
|
fetch("/get-text", {
|
||||||
"method": "POST",
|
"method": "POST",
|
||||||
"body": formData,
|
"body": formData,
|
||||||
}).then((res) => res.json())
|
}).then((res) => res.json())
|
||||||
.then((res) => {
|
.then((res) => {
|
||||||
setChatState((curState: Array<ChatMsg>) => [
|
setChatState((curState: Array<ChatMsg>) => [
|
||||||
...curState,
|
...curState,
|
||||||
{ "role": "user", "content": res["user-transcript"] },
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": res["user-transcript"],
|
||||||
|
"audio": URL.createObjectURL(userAudio[userAudio.length - 1]),
|
||||||
|
},
|
||||||
]);
|
]);
|
||||||
fetch("http://100.82.51.22:8001/conversation", {
|
fetch("/conversation", {
|
||||||
"method": "POST",
|
"method": "POST",
|
||||||
"body": JSON.stringify([...chatState, {
|
"body": JSON.stringify([...chatState, {
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -98,9 +97,10 @@ function App() {
|
|||||||
.then((res) => {
|
.then((res) => {
|
||||||
setChatState((
|
setChatState((
|
||||||
curState: Array<ChatMsg>,
|
curState: Array<ChatMsg>,
|
||||||
) => [...curState, res]);
|
) => [...curState, {
|
||||||
console.log("attempting to play result");
|
...res,
|
||||||
playMsg(res);
|
"audio": "/speak?" + new URLSearchParams({ text: res.content }),
|
||||||
|
}]);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ import {
|
|||||||
TbPlayerStop,
|
TbPlayerStop,
|
||||||
} from "react-icons/tb";
|
} from "react-icons/tb";
|
||||||
|
|
||||||
|
|
||||||
export type ChatMsg = {
|
export type ChatMsg = {
|
||||||
role: string;
|
role: string;
|
||||||
content: string;
|
content: string;
|
||||||
|
audio?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
export function Header() {
|
export function Header() {
|
||||||
@@ -59,6 +59,11 @@ export function Msg(props: { msg: ChatMsg }) {
|
|||||||
<span className="ml-8">
|
<span className="ml-8">
|
||||||
{props.msg.content}
|
{props.msg.content}
|
||||||
</span>
|
</span>
|
||||||
|
<audio
|
||||||
|
controls
|
||||||
|
autoPlay={props.msg.role == "assistant"}
|
||||||
|
src={props.msg.audio}
|
||||||
|
/>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,4 +9,7 @@ export default defineConfig({
|
|||||||
"Access-Control-Allow-Origin": '*',
|
"Access-Control-Allow-Origin": '*',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
build: {
|
||||||
|
outDir: '../backend/dist/',
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user