Spaces:

MuhammadQASIM111
/

AUDIO-TO-TEXT-AGENT1

Sleeping

App Files Files Community

AUDIO-TO-TEXT-AGENT1 / app.py

MuhammadQASIM111

Create app.py

531f3b0 verified 9 months ago

raw

history blame contribute delete

2.99 kB


	import os
	import whisper
	import scipy.io.wavfile as wav
	from groq import Groq
	from gtts import gTTS
	import gradio as gr
	from pydub import AudioSegment

	# Load Whisper model (Use "small" or "medium" if "base" is too slow)
	model = whisper.load_model("base")

	# Set the Groq API key as an environment variable
	os.environ["GROQ_API_KEY"] = "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" # Replace with your actual key

	# Get the Groq API key from the environment variable
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")
	if not GROQ_API_KEY:
	raise ValueError("❌ ERROR: Groq API key is missing! Set it in your environment.")

	# Initialize the Groq client using the API key variable
	client = Groq(api_key=GROQ_API_KEY)
	# Function to transcribe audio using Whisper
	def transcribe_audio(file_path):
	try:
	print(f"📂 Processing File: {file_path}")

	# Convert audio to WAV (if needed)
	audio = AudioSegment.from_file(file_path)
	converted_path = "converted.wav"
	audio.export(converted_path, format="wav")

	# Run Whisper Transcription
	result = model.transcribe(converted_path, fp16=False) # Use FP32 for CPU
	return result["text"]

	except Exception as e:
	return f"❌ ERROR in Transcription: {str(e)}"

	# Function to interact with Groq LLM
	def chat_with_groq(text):
	try:
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": text}],
	model="llama-3.3-70b-versatile"
	)
	return chat_completion.choices[0].message.content

	except Exception as e:
	return f"❌ ERROR in LLM Interaction: {str(e)}"

	# Function to convert text to speech
	def text_to_speech(text):
	try:
	tts = gTTS(text=text, lang="en")
	filename = "output_audio.mp3"
	tts.save(filename)
	return filename

	except Exception as e:
	return f"❌ ERROR in TTS: {str(e)}"

	# Main chatbot function (User Uploads Different Files)
	def voice_chatbot(audio_file):
	if not audio_file:
	return "❌ Please upload an audio file!", None

	# Process Speech-to-Text
	text = transcribe_audio(audio_file)
	if "ERROR" in text:
	return text, None # Return error message

	# Get AI response
	response_text = chat_with_groq(text)
	if "ERROR" in response_text:
	return response_text, None # Return error message

	# Convert response to speech
	response_audio = text_to_speech(response_text)
	if "ERROR" in response_audio:
	return response_audio, None # Return error message

	return response_text, response_audio

	# Gradio UI for File Upload (No Default File)
	iface = gr.Interface(
	fn=voice_chatbot,
	inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
	outputs=["text", "audio"],
	title="🎤 Real-Time Voice Chatbot",
	description="Upload an audio file to transcribe and chat with AI.",
	)

	# Launch Gradio App
	iface.launch()