Update app.py
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ import re
|
|
| 14 |
import scipy.io.wavfile
|
| 15 |
|
| 16 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 17 |
-
model_id = "openai/whisper-
|
| 18 |
|
| 19 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 20 |
model_id, low_cpu_mem_usage=True, use_safetensors=True
|
|
@@ -31,7 +31,7 @@ pipe = pipeline(
|
|
| 31 |
max_new_tokens=128,
|
| 32 |
chunk_length_s=30,
|
| 33 |
batch_size=8,
|
| 34 |
-
|
| 35 |
)
|
| 36 |
|
| 37 |
|
|
@@ -39,24 +39,24 @@ arabic_bad_Words = pd.read_csv("arabic_bad_words_dataset.csv")
|
|
| 39 |
english_bad_Words = pd.read_csv("english_bad_words_dataset.csv")
|
| 40 |
|
| 41 |
|
| 42 |
-
def load_audio(file: str, sr: int = 16000):
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
|
| 59 |
-
|
| 60 |
|
| 61 |
def clean_english_word(word):
|
| 62 |
cleaned_text = re.sub(r'^[\s\W_]+|[\s\W_]+$', '', word)
|
|
|
|
| 14 |
import scipy.io.wavfile
|
| 15 |
|
| 16 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 17 |
+
model_id = "openai/whisper-large-v3"
|
| 18 |
|
| 19 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 20 |
model_id, low_cpu_mem_usage=True, use_safetensors=True
|
|
|
|
| 31 |
max_new_tokens=128,
|
| 32 |
chunk_length_s=30,
|
| 33 |
batch_size=8,
|
| 34 |
+
device=device,
|
| 35 |
)
|
| 36 |
|
| 37 |
|
|
|
|
| 39 |
english_bad_Words = pd.read_csv("english_bad_words_dataset.csv")
|
| 40 |
|
| 41 |
|
| 42 |
+
# def load_audio(file: str, sr: int = 16000):
|
| 43 |
+
# try:
|
| 44 |
+
# # This reads the audio from the video file without creating a separate audio file
|
| 45 |
+
# command = [
|
| 46 |
+
# "ffmpeg",
|
| 47 |
+
# "-i", file,
|
| 48 |
+
# "-f", "s16le",
|
| 49 |
+
# "-acodec", "pcm_s16le",
|
| 50 |
+
# "-ar", str(sr),
|
| 51 |
+
# "-ac", "1",
|
| 52 |
+
# "-"
|
| 53 |
+
# ]
|
| 54 |
|
| 55 |
+
# out = subprocess.run(command, capture_output=True, check=True).stdout
|
| 56 |
+
# except subprocess.CalledProcessError as e:
|
| 57 |
+
# raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
|
| 58 |
|
| 59 |
+
# return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
|
| 60 |
|
| 61 |
def clean_english_word(word):
|
| 62 |
cleaned_text = re.sub(r'^[\s\W_]+|[\s\W_]+$', '', word)
|