|
|
|
|
|
import os |
|
|
import whisper |
|
|
import scipy.io.wavfile as wav |
|
|
from groq import Groq |
|
|
from gtts import gTTS |
|
|
import gradio as gr |
|
|
from pydub import AudioSegment |
|
|
|
|
|
|
|
|
model = whisper.load_model("base") |
|
|
|
|
|
|
|
|
os.environ["GROQ_API_KEY"] = "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" |
|
|
|
|
|
|
|
|
GROQ_API_KEY = os.getenv("GROQ_API_KEY") |
|
|
if not GROQ_API_KEY: |
|
|
raise ValueError("β ERROR: Groq API key is missing! Set it in your environment.") |
|
|
|
|
|
|
|
|
client = Groq(api_key=GROQ_API_KEY) |
|
|
|
|
|
def transcribe_audio(file_path): |
|
|
try: |
|
|
print(f"π Processing File: {file_path}") |
|
|
|
|
|
|
|
|
audio = AudioSegment.from_file(file_path) |
|
|
converted_path = "converted.wav" |
|
|
audio.export(converted_path, format="wav") |
|
|
|
|
|
|
|
|
result = model.transcribe(converted_path, fp16=False) |
|
|
return result["text"] |
|
|
|
|
|
except Exception as e: |
|
|
return f"β ERROR in Transcription: {str(e)}" |
|
|
|
|
|
|
|
|
def chat_with_groq(text): |
|
|
try: |
|
|
chat_completion = client.chat.completions.create( |
|
|
messages=[{"role": "user", "content": text}], |
|
|
model="llama-3.3-70b-versatile" |
|
|
) |
|
|
return chat_completion.choices[0].message.content |
|
|
|
|
|
except Exception as e: |
|
|
return f"β ERROR in LLM Interaction: {str(e)}" |
|
|
|
|
|
|
|
|
def text_to_speech(text): |
|
|
try: |
|
|
tts = gTTS(text=text, lang="en") |
|
|
filename = "output_audio.mp3" |
|
|
tts.save(filename) |
|
|
return filename |
|
|
|
|
|
except Exception as e: |
|
|
return f"β ERROR in TTS: {str(e)}" |
|
|
|
|
|
|
|
|
def voice_chatbot(audio_file): |
|
|
if not audio_file: |
|
|
return "β Please upload an audio file!", None |
|
|
|
|
|
|
|
|
text = transcribe_audio(audio_file) |
|
|
if "ERROR" in text: |
|
|
return text, None |
|
|
|
|
|
|
|
|
response_text = chat_with_groq(text) |
|
|
if "ERROR" in response_text: |
|
|
return response_text, None |
|
|
|
|
|
|
|
|
response_audio = text_to_speech(response_text) |
|
|
if "ERROR" in response_audio: |
|
|
return response_audio, None |
|
|
|
|
|
return response_text, response_audio |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=voice_chatbot, |
|
|
inputs=gr.Audio(type="filepath", label="Upload an Audio File"), |
|
|
outputs=["text", "audio"], |
|
|
title="π€ Real-Time Voice Chatbot", |
|
|
description="Upload an audio file to transcribe and chat with AI.", |
|
|
) |
|
|
|
|
|
|
|
|
iface.launch() |