Spaces:
Sleeping
Sleeping
aldan.creo
commited on
Commit
·
b99bb69
1
Parent(s):
3da5b44
First version
Browse files- .gitignore +5 -0
- README.md +2 -4
- app.py +87 -0
- requirements.txt +3 -0
.gitignore
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.conda
|
| 2 |
+
__pycache__
|
| 3 |
+
*.pyc
|
| 4 |
+
.vscode
|
| 5 |
+
.DS_Store
|
README.md
CHANGED
|
@@ -1,13 +1,11 @@
|
|
| 1 |
---
|
| 2 |
title: GrAImmarian
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.3.0
|
| 8 |
app_file: app.py
|
| 9 |
-
pinned:
|
| 10 |
short_description: Utilizes ASR to check for filler words when public speaking
|
| 11 |
---
|
| 12 |
-
|
| 13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
title: GrAImmarian
|
| 3 |
+
emoji: 🗣️
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.3.0
|
| 8 |
app_file: app.py
|
| 9 |
+
pinned: true
|
| 10 |
short_description: Utilizes ASR to check for filler words when public speaking
|
| 11 |
---
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from transformers import pipeline
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def transcribe(state, words_list, new_chunk):
|
| 9 |
+
print(f"state: {state}")
|
| 10 |
+
|
| 11 |
+
if state is None:
|
| 12 |
+
state = {}
|
| 13 |
+
|
| 14 |
+
stream = state.get("stream", None)
|
| 15 |
+
previous_transcription = state.get("full_transcription", "")
|
| 16 |
+
previous_counts_of_words = state.get("counts_of_words", {})
|
| 17 |
+
|
| 18 |
+
if new_chunk is None:
|
| 19 |
+
gr.Info("You can start transcribing by clicking on the Record button")
|
| 20 |
+
print("new chunk is None")
|
| 21 |
+
return state, previous_counts_of_words, previous_transcription
|
| 22 |
+
|
| 23 |
+
sr, y = new_chunk
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
words_to_check_for = [word.strip() for word in words_list.split(",")]
|
| 27 |
+
except:
|
| 28 |
+
gr.Warning("Please enter a valid list of words to check for")
|
| 29 |
+
words_to_check_for = []
|
| 30 |
+
|
| 31 |
+
# Convert to mono if stereo
|
| 32 |
+
if y.ndim > 1:
|
| 33 |
+
y = y.mean(axis=1)
|
| 34 |
+
|
| 35 |
+
y = y.astype(np.float32)
|
| 36 |
+
y /= np.max(np.abs(y))
|
| 37 |
+
|
| 38 |
+
if stream is not None:
|
| 39 |
+
stream = np.concatenate([stream, y])
|
| 40 |
+
else:
|
| 41 |
+
stream = y
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
new_transcription = transcriber({"sampling_rate": sr, "raw": stream})
|
| 45 |
+
except Exception as e:
|
| 46 |
+
gr.Error(f"Transcription failed. Error: {e}")
|
| 47 |
+
print(f"Transcription failed. Error: {e}")
|
| 48 |
+
return state, previous_counts_of_words, previous_transcription
|
| 49 |
+
|
| 50 |
+
print(f"new transcription: {new_transcription}")
|
| 51 |
+
new_transcription_text = new_transcription["text"]
|
| 52 |
+
full_transcription_text = f"{previous_transcription} {new_transcription_text}"
|
| 53 |
+
|
| 54 |
+
new_transcription_text_lower = new_transcription_text.lower()
|
| 55 |
+
|
| 56 |
+
new_counts_of_words = {
|
| 57 |
+
word: new_transcription_text_lower.count(word) for word in words_to_check_for
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
new_counts_of_words = {
|
| 61 |
+
word: new_counts_of_words.get(word, 0) + previous_counts_of_words.get(word, 0)
|
| 62 |
+
for word in words_to_check_for
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
new_state = {
|
| 66 |
+
"stream": stream,
|
| 67 |
+
"full_transcription": full_transcription_text,
|
| 68 |
+
"counts_of_words": new_counts_of_words,
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
print(f"new state: {new_state}")
|
| 72 |
+
|
| 73 |
+
return new_state, new_counts_of_words, full_transcription_text
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
demo = gr.Interface(
|
| 77 |
+
transcribe,
|
| 78 |
+
[
|
| 79 |
+
"state",
|
| 80 |
+
gr.Textbox(label="List of filer words"),
|
| 81 |
+
gr.Audio(sources=["microphone"], streaming=True),
|
| 82 |
+
],
|
| 83 |
+
["state", gr.JSON(label="Filler words count"), gr.Text(label="Transcription")],
|
| 84 |
+
live=True,
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==5.3.0
|
| 2 |
+
transformers==4.46.0
|
| 3 |
+
torchaudio==2.5.0
|