import os import whisper import scipy.io.wavfile as wav from groq import Groq from gtts import gTTS import gradio as gr from pydub import AudioSegment # Load Whisper model (Use "small" or "medium" if "base" is too slow) model = whisper.load_model("base") # Set the Groq API key as an environment variable os.environ["GROQ_API_KEY"] = "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" # Replace with your actual key # Get the Groq API key from the environment variable GROQ_API_KEY = os.getenv("GROQ_API_KEY") if not GROQ_API_KEY: raise ValueError("❌ ERROR: Groq API key is missing! Set it in your environment.") # Initialize the Groq client using the API key variable client = Groq(api_key=GROQ_API_KEY) # Function to transcribe audio using Whisper def transcribe_audio(file_path): try: print(f"📂 Processing File: {file_path}") # Convert audio to WAV (if needed) audio = AudioSegment.from_file(file_path) converted_path = "converted.wav" audio.export(converted_path, format="wav") # Run Whisper Transcription result = model.transcribe(converted_path, fp16=False) # Use FP32 for CPU return result["text"] except Exception as e: return f"❌ ERROR in Transcription: {str(e)}" # Function to interact with Groq LLM def chat_with_groq(text): try: chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": text}], model="llama-3.3-70b-versatile" ) return chat_completion.choices[0].message.content except Exception as e: return f"❌ ERROR in LLM Interaction: {str(e)}" # Function to convert text to speech def text_to_speech(text): try: tts = gTTS(text=text, lang="en") filename = "output_audio.mp3" tts.save(filename) return filename except Exception as e: return f"❌ ERROR in TTS: {str(e)}" # Main chatbot function (User Uploads Different Files) def voice_chatbot(audio_file): if not audio_file: return "❌ Please upload an audio file!", None # Process Speech-to-Text text = transcribe_audio(audio_file) if "ERROR" in text: return text, None # Return error message # Get AI response response_text = chat_with_groq(text) if "ERROR" in response_text: return response_text, None # Return error message # Convert response to speech response_audio = text_to_speech(response_text) if "ERROR" in response_audio: return response_audio, None # Return error message return response_text, response_audio # Gradio UI for File Upload (No Default File) iface = gr.Interface( fn=voice_chatbot, inputs=gr.Audio(type="filepath", label="Upload an Audio File"), outputs=["text", "audio"], title="🎤 Real-Time Voice Chatbot", description="Upload an audio file to transcribe and chat with AI.", ) # Launch Gradio App iface.launch()