Spaces:

amirjamali
/

accent-detector

Sleeping

App Files Files Community

amirjamali commited on May 24

Commit

bb36a56

unverified ·

1 Parent(s): aa81f17

Update Dockerfile and requirements; add torchvision dependency and enhance audio visualization handling in Streamlit app

Browse files

Files changed (3) hide show

Dockerfile +1 -1
requirements.txt +1 -0
src/streamlit_app.py +60 -27

Dockerfile CHANGED Viewed

@@ -35,7 +35,7 @@ COPY requirements.txt .
 # Install Python dependencies with specific order for compatibility
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir torch==2.0.1 torchaudio==2.0.2 && \
     pip install --no-cache-dir -r requirements.txt && \
     pip install --no-cache-dir git+https://github.com/speechbrain/[email protected]

 # Install Python dependencies with specific order for compatibility
 RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir torch==2.0.1 torchaudio==2.0.2 torchvision==0.15.2 && \
     pip install --no-cache-dir -r requirements.txt && \
     pip install --no-cache-dir git+https://github.com/speechbrain/[email protected]

requirements.txt CHANGED Viewed

@@ -7,6 +7,7 @@ beautifulsoup4==4.12.2
 speechbrain==0.5.14
 torch==2.0.1
 torchaudio==2.0.2
 # Pin transformers to version that has AutoProcessor
 transformers==4.31.0
 librosa==0.10.1

 speechbrain==0.5.14
 torch==2.0.1
 torchaudio==2.0.2
+torchvision==0.15.2
 # Pin transformers to version that has AutoProcessor
 transformers==4.31.0
 librosa==0.10.1

src/streamlit_app.py CHANGED Viewed

@@ -442,8 +442,7 @@ class AccentDetector:
         # Generate explanation
         explanation = self.generate_explanation(audio_path, accent, accent_confidence, is_english, lang)
-        # Create visualization of the audio waveform
         try:
             y, sr = librosa.load(audio_path, sr=None)
             fig, ax = plt.subplots(figsize=(10, 2))
@@ -453,6 +452,25 @@ class AccentDetector:
             ax.set_title('Audio Waveform')
             plt.tight_layout()
             audio_viz = fig
         except Exception as e:
             st.warning(f"Could not generate audio visualization: {str(e)}")
             audio_viz = None
@@ -467,21 +485,34 @@ class AccentDetector:
             "audio_viz": audio_viz
         }
-def process_uploaded_audio(uploaded_file):
-    """Process uploaded audio file"""
     try:
         # Create a unique filename based on timestamp
         timestamp = str(int(time.time()))
-        file_extension = os.path.splitext(uploaded_file.name)[1].lower()
-        # Create an uploads directory if it doesn't exist
-        uploads_dir = os.path.join(os.getcwd(), "uploads")
-        os.makedirs(uploads_dir, exist_ok=True)
-        # Write the uploaded file to disk with proper extension in the uploads directory
-        temp_input_path = os.path.join(uploads_dir, f"uploaded_audio_{timestamp}{file_extension}")
-        with open(temp_input_path, "wb") as f:
-            f.write(uploaded_file.getbuffer())
               # For MP4 files, extract the audio using ffmpeg
         if file_extension == ".mp4":
             st.info("Extracting audio from video file...")
@@ -660,10 +691,13 @@ with tab1:
                             # Show explanation in a box
                             st.markdown("### Expert Analysis")
                             st.info(results['explanation'])
-                        with col2:
                             if results['audio_viz']:
-                                st.pyplot(results['audio_viz'])
                             # Show audio playback
                             st.audio(audio_path)
@@ -731,19 +765,15 @@ with tab2:
                         # Create an uploads directory if it doesn't exist
                         uploads_dir = os.path.join(os.getcwd(), "uploads")
                         os.makedirs(uploads_dir, exist_ok=True)
-                        # Save the file first to avoid streaming it multiple times
                         temp_file_path = os.path.join(uploads_dir, f"temp_{int(time.time())}_{uploaded_file.name}")
                         with open(temp_file_path, "wb") as f:
                             f.write(uploaded_file.getbuffer())
                         progress_bar.progress(50, text="Analyzing audio...")
-                        # Now process from the saved file
-                        with open(temp_file_path, "rb") as f:
-                            # Create a new UploadedFile object from the saved file
-                            file_content = f.read()
-                            results = process_uploaded_audio(uploaded_file)
                         progress_bar.progress(100, text="Analysis complete!")
                         # Display results
@@ -760,11 +790,14 @@ with tab2:
                           # Show explanation in a box
                         st.markdown("### Expert Analysis")
                         st.info(results['explanation'])
-                    with col2:
                         if results['audio_viz']:
-                            st.pyplot(results['audio_viz'])
                 except subprocess.CalledProcessError as e:
                     st.error("Error processing audio file")
                     st.error(f"FFmpeg error: {e.stderr.decode('utf-8') if e.stderr else str(e)}")

         # Generate explanation
         explanation = self.generate_explanation(audio_path, accent, accent_confidence, is_english, lang)
+          # Create visualization of the audio waveform
         try:
             y, sr = librosa.load(audio_path, sr=None)
             fig, ax = plt.subplots(figsize=(10, 2))
             ax.set_title('Audio Waveform')
             plt.tight_layout()
             audio_viz = fig
+            # Make sure the figure can be saved
+            try:
+                # Test if the figure can be saved
+                import tempfile
+                with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
+                    plt.savefig(tmp.name)
+            except Exception as viz_save_error:
+                st.warning(f"Could not save visualization: {str(viz_save_error)}. Using simpler visualization.")
+                # Create a simple alternative visualization
+                import numpy as np
+                # Downsample for performance
+                sample_rate = max(1, len(y) // 1000)
+                y_downsampled = y[::sample_rate]
+                fig2, ax2 = plt.subplots(figsize=(8, 2))
+                ax2.plot(np.arange(len(y_downsampled)), y_downsampled)
+                ax2.set_title("Audio Waveform (simplified)")
+                audio_viz = fig2
         except Exception as e:
             st.warning(f"Could not generate audio visualization: {str(e)}")
             audio_viz = None
             "audio_viz": audio_viz
         }
+def process_uploaded_audio(file_input):
+    """Process uploaded audio file
+    Args:
+        file_input: Either a StreamlitUploadedFile object or a string path to a file
+    """
     try:
         # Create a unique filename based on timestamp
         timestamp = str(int(time.time()))
+        # Handle different input types
+        if isinstance(file_input, str):
+            # If it's already a file path
+            temp_input_path = file_input
+            file_extension = os.path.splitext(temp_input_path)[1].lower()
+            st.info(f"Processing from saved file: {os.path.basename(temp_input_path)}")
+        else:
+            # If it's a StreamlitUploadedFile
+            file_extension = os.path.splitext(file_input.name)[1].lower()
+            # Create an uploads directory if it doesn't exist
+            uploads_dir = os.path.join(os.getcwd(), "uploads")
+            os.makedirs(uploads_dir, exist_ok=True)
+            # Write the uploaded file to disk with proper extension in the uploads directory
+            temp_input_path = os.path.join(uploads_dir, f"uploaded_audio_{timestamp}{file_extension}")
+            with open(temp_input_path, "wb") as f:
+                f.write(file_input.getbuffer())
               # For MP4 files, extract the audio using ffmpeg
         if file_extension == ".mp4":
             st.info("Extracting audio from video file...")
                             # Show explanation in a box
                             st.markdown("### Expert Analysis")
                             st.info(results['explanation'])
+                          with col2:
                             if results['audio_viz']:
+                                try:
+                                    st.pyplot(results['audio_viz'])
+                                except Exception as viz_error:
+                                    st.warning("Could not display visualization due to torchvision issue.")
+                                    st.info("Audio analysis was successful even though visualization failed.")
                             # Show audio playback
                             st.audio(audio_path)
                         # Create an uploads directory if it doesn't exist
                         uploads_dir = os.path.join(os.getcwd(), "uploads")
                         os.makedirs(uploads_dir, exist_ok=True)
+                          # Save the file first to avoid streaming it multiple times
                         temp_file_path = os.path.join(uploads_dir, f"temp_{int(time.time())}_{uploaded_file.name}")
                         with open(temp_file_path, "wb") as f:
                             f.write(uploaded_file.getbuffer())
                         progress_bar.progress(50, text="Analyzing audio...")
+                        # Process using the saved file path directly
+                        results = process_uploaded_audio(temp_file_path)
                         progress_bar.progress(100, text="Analysis complete!")
                         # Display results
                           # Show explanation in a box
                         st.markdown("### Expert Analysis")
                         st.info(results['explanation'])
+                      with col2:
                         if results['audio_viz']:
+                            try:
+                                st.pyplot(results['audio_viz'])
+                            except Exception as viz_error:
+                                st.warning("Could not display visualization due to torchvision issue.")
+                                st.info("Audio analysis was successful even though visualization failed.")
                 except subprocess.CalledProcessError as e:
                     st.error("Error processing audio file")
                     st.error(f"FFmpeg error: {e.stderr.decode('utf-8') if e.stderr else str(e)}")