Spaces:
Sleeping
Sleeping
Update Dockerfile and requirements; add torchvision dependency and enhance audio visualization handling in Streamlit app
Browse files- Dockerfile +1 -1
- requirements.txt +1 -0
- src/streamlit_app.py +60 -27
Dockerfile
CHANGED
|
@@ -35,7 +35,7 @@ COPY requirements.txt .
|
|
| 35 |
|
| 36 |
# Install Python dependencies with specific order for compatibility
|
| 37 |
RUN pip install --no-cache-dir --upgrade pip && \
|
| 38 |
-
pip install --no-cache-dir torch==2.0.1 torchaudio==2.0.2 && \
|
| 39 |
pip install --no-cache-dir -r requirements.txt && \
|
| 40 |
pip install --no-cache-dir git+https://github.com/speechbrain/[email protected]
|
| 41 |
|
|
|
|
| 35 |
|
| 36 |
# Install Python dependencies with specific order for compatibility
|
| 37 |
RUN pip install --no-cache-dir --upgrade pip && \
|
| 38 |
+
pip install --no-cache-dir torch==2.0.1 torchaudio==2.0.2 torchvision==0.15.2 && \
|
| 39 |
pip install --no-cache-dir -r requirements.txt && \
|
| 40 |
pip install --no-cache-dir git+https://github.com/speechbrain/[email protected]
|
| 41 |
|
requirements.txt
CHANGED
|
@@ -7,6 +7,7 @@ beautifulsoup4==4.12.2
|
|
| 7 |
speechbrain==0.5.14
|
| 8 |
torch==2.0.1
|
| 9 |
torchaudio==2.0.2
|
|
|
|
| 10 |
# Pin transformers to version that has AutoProcessor
|
| 11 |
transformers==4.31.0
|
| 12 |
librosa==0.10.1
|
|
|
|
| 7 |
speechbrain==0.5.14
|
| 8 |
torch==2.0.1
|
| 9 |
torchaudio==2.0.2
|
| 10 |
+
torchvision==0.15.2
|
| 11 |
# Pin transformers to version that has AutoProcessor
|
| 12 |
transformers==4.31.0
|
| 13 |
librosa==0.10.1
|
src/streamlit_app.py
CHANGED
|
@@ -442,8 +442,7 @@ class AccentDetector:
|
|
| 442 |
|
| 443 |
# Generate explanation
|
| 444 |
explanation = self.generate_explanation(audio_path, accent, accent_confidence, is_english, lang)
|
| 445 |
-
|
| 446 |
-
# Create visualization of the audio waveform
|
| 447 |
try:
|
| 448 |
y, sr = librosa.load(audio_path, sr=None)
|
| 449 |
fig, ax = plt.subplots(figsize=(10, 2))
|
|
@@ -453,6 +452,25 @@ class AccentDetector:
|
|
| 453 |
ax.set_title('Audio Waveform')
|
| 454 |
plt.tight_layout()
|
| 455 |
audio_viz = fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 456 |
except Exception as e:
|
| 457 |
st.warning(f"Could not generate audio visualization: {str(e)}")
|
| 458 |
audio_viz = None
|
|
@@ -467,21 +485,34 @@ class AccentDetector:
|
|
| 467 |
"audio_viz": audio_viz
|
| 468 |
}
|
| 469 |
|
| 470 |
-
def process_uploaded_audio(
|
| 471 |
-
"""Process uploaded audio file
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
try:
|
| 473 |
# Create a unique filename based on timestamp
|
| 474 |
timestamp = str(int(time.time()))
|
| 475 |
-
file_extension = os.path.splitext(uploaded_file.name)[1].lower()
|
| 476 |
-
|
| 477 |
-
# Create an uploads directory if it doesn't exist
|
| 478 |
-
uploads_dir = os.path.join(os.getcwd(), "uploads")
|
| 479 |
-
os.makedirs(uploads_dir, exist_ok=True)
|
| 480 |
|
| 481 |
-
#
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
# For MP4 files, extract the audio using ffmpeg
|
| 486 |
if file_extension == ".mp4":
|
| 487 |
st.info("Extracting audio from video file...")
|
|
@@ -660,10 +691,13 @@ with tab1:
|
|
| 660 |
# Show explanation in a box
|
| 661 |
st.markdown("### Expert Analysis")
|
| 662 |
st.info(results['explanation'])
|
| 663 |
-
|
| 664 |
-
with col2:
|
| 665 |
if results['audio_viz']:
|
| 666 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
|
| 668 |
# Show audio playback
|
| 669 |
st.audio(audio_path)
|
|
@@ -731,19 +765,15 @@ with tab2:
|
|
| 731 |
# Create an uploads directory if it doesn't exist
|
| 732 |
uploads_dir = os.path.join(os.getcwd(), "uploads")
|
| 733 |
os.makedirs(uploads_dir, exist_ok=True)
|
| 734 |
-
|
| 735 |
-
# Save the file first to avoid streaming it multiple times
|
| 736 |
temp_file_path = os.path.join(uploads_dir, f"temp_{int(time.time())}_{uploaded_file.name}")
|
| 737 |
with open(temp_file_path, "wb") as f:
|
| 738 |
f.write(uploaded_file.getbuffer())
|
| 739 |
|
| 740 |
progress_bar.progress(50, text="Analyzing audio...")
|
| 741 |
|
| 742 |
-
#
|
| 743 |
-
|
| 744 |
-
# Create a new UploadedFile object from the saved file
|
| 745 |
-
file_content = f.read()
|
| 746 |
-
results = process_uploaded_audio(uploaded_file)
|
| 747 |
|
| 748 |
progress_bar.progress(100, text="Analysis complete!")
|
| 749 |
# Display results
|
|
@@ -760,11 +790,14 @@ with tab2:
|
|
| 760 |
# Show explanation in a box
|
| 761 |
st.markdown("### Expert Analysis")
|
| 762 |
st.info(results['explanation'])
|
| 763 |
-
|
| 764 |
-
with col2:
|
| 765 |
if results['audio_viz']:
|
| 766 |
-
|
| 767 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 768 |
except subprocess.CalledProcessError as e:
|
| 769 |
st.error("Error processing audio file")
|
| 770 |
st.error(f"FFmpeg error: {e.stderr.decode('utf-8') if e.stderr else str(e)}")
|
|
|
|
| 442 |
|
| 443 |
# Generate explanation
|
| 444 |
explanation = self.generate_explanation(audio_path, accent, accent_confidence, is_english, lang)
|
| 445 |
+
# Create visualization of the audio waveform
|
|
|
|
| 446 |
try:
|
| 447 |
y, sr = librosa.load(audio_path, sr=None)
|
| 448 |
fig, ax = plt.subplots(figsize=(10, 2))
|
|
|
|
| 452 |
ax.set_title('Audio Waveform')
|
| 453 |
plt.tight_layout()
|
| 454 |
audio_viz = fig
|
| 455 |
+
|
| 456 |
+
# Make sure the figure can be saved
|
| 457 |
+
try:
|
| 458 |
+
# Test if the figure can be saved
|
| 459 |
+
import tempfile
|
| 460 |
+
with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
|
| 461 |
+
plt.savefig(tmp.name)
|
| 462 |
+
except Exception as viz_save_error:
|
| 463 |
+
st.warning(f"Could not save visualization: {str(viz_save_error)}. Using simpler visualization.")
|
| 464 |
+
# Create a simple alternative visualization
|
| 465 |
+
import numpy as np
|
| 466 |
+
# Downsample for performance
|
| 467 |
+
sample_rate = max(1, len(y) // 1000)
|
| 468 |
+
y_downsampled = y[::sample_rate]
|
| 469 |
+
fig2, ax2 = plt.subplots(figsize=(8, 2))
|
| 470 |
+
ax2.plot(np.arange(len(y_downsampled)), y_downsampled)
|
| 471 |
+
ax2.set_title("Audio Waveform (simplified)")
|
| 472 |
+
audio_viz = fig2
|
| 473 |
+
|
| 474 |
except Exception as e:
|
| 475 |
st.warning(f"Could not generate audio visualization: {str(e)}")
|
| 476 |
audio_viz = None
|
|
|
|
| 485 |
"audio_viz": audio_viz
|
| 486 |
}
|
| 487 |
|
| 488 |
+
def process_uploaded_audio(file_input):
|
| 489 |
+
"""Process uploaded audio file
|
| 490 |
+
|
| 491 |
+
Args:
|
| 492 |
+
file_input: Either a StreamlitUploadedFile object or a string path to a file
|
| 493 |
+
"""
|
| 494 |
try:
|
| 495 |
# Create a unique filename based on timestamp
|
| 496 |
timestamp = str(int(time.time()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
|
| 498 |
+
# Handle different input types
|
| 499 |
+
if isinstance(file_input, str):
|
| 500 |
+
# If it's already a file path
|
| 501 |
+
temp_input_path = file_input
|
| 502 |
+
file_extension = os.path.splitext(temp_input_path)[1].lower()
|
| 503 |
+
st.info(f"Processing from saved file: {os.path.basename(temp_input_path)}")
|
| 504 |
+
else:
|
| 505 |
+
# If it's a StreamlitUploadedFile
|
| 506 |
+
file_extension = os.path.splitext(file_input.name)[1].lower()
|
| 507 |
+
|
| 508 |
+
# Create an uploads directory if it doesn't exist
|
| 509 |
+
uploads_dir = os.path.join(os.getcwd(), "uploads")
|
| 510 |
+
os.makedirs(uploads_dir, exist_ok=True)
|
| 511 |
+
|
| 512 |
+
# Write the uploaded file to disk with proper extension in the uploads directory
|
| 513 |
+
temp_input_path = os.path.join(uploads_dir, f"uploaded_audio_{timestamp}{file_extension}")
|
| 514 |
+
with open(temp_input_path, "wb") as f:
|
| 515 |
+
f.write(file_input.getbuffer())
|
| 516 |
# For MP4 files, extract the audio using ffmpeg
|
| 517 |
if file_extension == ".mp4":
|
| 518 |
st.info("Extracting audio from video file...")
|
|
|
|
| 691 |
# Show explanation in a box
|
| 692 |
st.markdown("### Expert Analysis")
|
| 693 |
st.info(results['explanation'])
|
| 694 |
+
with col2:
|
|
|
|
| 695 |
if results['audio_viz']:
|
| 696 |
+
try:
|
| 697 |
+
st.pyplot(results['audio_viz'])
|
| 698 |
+
except Exception as viz_error:
|
| 699 |
+
st.warning("Could not display visualization due to torchvision issue.")
|
| 700 |
+
st.info("Audio analysis was successful even though visualization failed.")
|
| 701 |
|
| 702 |
# Show audio playback
|
| 703 |
st.audio(audio_path)
|
|
|
|
| 765 |
# Create an uploads directory if it doesn't exist
|
| 766 |
uploads_dir = os.path.join(os.getcwd(), "uploads")
|
| 767 |
os.makedirs(uploads_dir, exist_ok=True)
|
| 768 |
+
# Save the file first to avoid streaming it multiple times
|
|
|
|
| 769 |
temp_file_path = os.path.join(uploads_dir, f"temp_{int(time.time())}_{uploaded_file.name}")
|
| 770 |
with open(temp_file_path, "wb") as f:
|
| 771 |
f.write(uploaded_file.getbuffer())
|
| 772 |
|
| 773 |
progress_bar.progress(50, text="Analyzing audio...")
|
| 774 |
|
| 775 |
+
# Process using the saved file path directly
|
| 776 |
+
results = process_uploaded_audio(temp_file_path)
|
|
|
|
|
|
|
|
|
|
| 777 |
|
| 778 |
progress_bar.progress(100, text="Analysis complete!")
|
| 779 |
# Display results
|
|
|
|
| 790 |
# Show explanation in a box
|
| 791 |
st.markdown("### Expert Analysis")
|
| 792 |
st.info(results['explanation'])
|
| 793 |
+
with col2:
|
|
|
|
| 794 |
if results['audio_viz']:
|
| 795 |
+
try:
|
| 796 |
+
st.pyplot(results['audio_viz'])
|
| 797 |
+
except Exception as viz_error:
|
| 798 |
+
st.warning("Could not display visualization due to torchvision issue.")
|
| 799 |
+
st.info("Audio analysis was successful even though visualization failed.")
|
| 800 |
+
|
| 801 |
except subprocess.CalledProcessError as e:
|
| 802 |
st.error("Error processing audio file")
|
| 803 |
st.error(f"FFmpeg error: {e.stderr.decode('utf-8') if e.stderr else str(e)}")
|