Spaces:
Sleeping
Sleeping
limit 4s CPU
Browse files
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: 🎵
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: pink
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Wav2small 2.0
|
| 3 |
emoji: 🎵
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: pink
|
app.py
CHANGED
|
@@ -369,7 +369,7 @@ ax.spines['left'].set_visible(False)
|
|
| 369 |
|
| 370 |
def process_audio(audio_filepath):
|
| 371 |
if audio_filepath is None:
|
| 372 |
-
|
| 373 |
return fig_error, fig_error
|
| 374 |
|
| 375 |
waveform, sample_rate = librosa.load(audio_filepath, sr=None)
|
|
@@ -380,20 +380,20 @@ def process_audio(audio_filepath):
|
|
| 380 |
else:
|
| 381 |
resampled_waveform_np = waveform[None, :]
|
| 382 |
|
| 383 |
-
x = torch.from_numpy(resampled_waveform_np).to(torch.float)
|
| 384 |
|
| 385 |
with torch.no_grad():
|
| 386 |
|
| 387 |
logits_dawn = dawn(x).cpu().numpy()[0, :]
|
| 388 |
|
| 389 |
-
logits_wavlm = base(x).cpu().numpy()[0, :]
|
| 390 |
|
| 391 |
# 17K params
|
| 392 |
logits_wav2small = wav2small(x).cpu().numpy()[0, :]
|
| 393 |
|
| 394 |
|
| 395 |
# --- Plot 1: Wav2Vec2 vs Wav2Small Teacher Outputs ---
|
| 396 |
-
|
| 397 |
fig, ax = plt.subplots(figsize=(10, 6))
|
| 398 |
|
| 399 |
left_bars_data = logits_dawn.clip(0, 1)
|
|
@@ -460,7 +460,7 @@ def process_audio(audio_filepath):
|
|
| 460 |
|
| 461 |
fig_2, ax_2 = plt.subplots(figsize=(10, 6))
|
| 462 |
|
| 463 |
-
|
| 464 |
left_bars_data = logits_wavlm.clip(0, 1)
|
| 465 |
right_bars_data = (.5 * logits_dawn + .5 * logits_wavlm).clip(0, 1)
|
| 466 |
|
|
|
|
| 369 |
|
| 370 |
def process_audio(audio_filepath):
|
| 371 |
if audio_filepath is None:
|
| 372 |
+
|
| 373 |
return fig_error, fig_error
|
| 374 |
|
| 375 |
waveform, sample_rate = librosa.load(audio_filepath, sr=None)
|
|
|
|
| 380 |
else:
|
| 381 |
resampled_waveform_np = waveform[None, :]
|
| 382 |
|
| 383 |
+
x = torch.from_numpy(resampled_waveform_np[:, :64000]).to(torch.float) # only 4s for speed
|
| 384 |
|
| 385 |
with torch.no_grad():
|
| 386 |
|
| 387 |
logits_dawn = dawn(x).cpu().numpy()[0, :]
|
| 388 |
|
| 389 |
+
logits_wavlm = base(x).cpu().numpy()[0, :]
|
| 390 |
|
| 391 |
# 17K params
|
| 392 |
logits_wav2small = wav2small(x).cpu().numpy()[0, :]
|
| 393 |
|
| 394 |
|
| 395 |
# --- Plot 1: Wav2Vec2 vs Wav2Small Teacher Outputs ---
|
| 396 |
+
|
| 397 |
fig, ax = plt.subplots(figsize=(10, 6))
|
| 398 |
|
| 399 |
left_bars_data = logits_dawn.clip(0, 1)
|
|
|
|
| 460 |
|
| 461 |
fig_2, ax_2 = plt.subplots(figsize=(10, 6))
|
| 462 |
|
| 463 |
+
|
| 464 |
left_bars_data = logits_wavlm.clip(0, 1)
|
| 465 |
right_bars_data = (.5 * logits_dawn + .5 * logits_wavlm).clip(0, 1)
|
| 466 |
|