Spaces:
Running
on
Zero
Running
on
Zero
MCP ready
Browse files
app.py
CHANGED
|
@@ -49,6 +49,16 @@ def save_spectrogram_image(spectrogram, filename):
|
|
| 49 |
|
| 50 |
@spaces.GPU
|
| 51 |
def infer(prompt, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
pipeline = AuffusionPipeline.from_pretrained("auffusion/auffusion")
|
| 53 |
prompt = prompt
|
| 54 |
output = pipeline(prompt=prompt)
|
|
@@ -59,6 +69,21 @@ def infer(prompt, progress=gr.Progress(track_tqdm=True)):
|
|
| 59 |
|
| 60 |
@spaces.GPU
|
| 61 |
def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# Load your audio file
|
| 64 |
input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
|
|
@@ -165,6 +190,22 @@ def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(tra
|
|
| 165 |
|
| 166 |
@spaces.GPU
|
| 167 |
def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
# Load your audio file
|
| 170 |
input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
|
|
@@ -353,7 +394,8 @@ with gr.Blocks(css=css) as demo:
|
|
| 353 |
submit_btn.click(
|
| 354 |
fn = infer,
|
| 355 |
inputs = [prompt],
|
| 356 |
-
outputs = [audio_out]
|
|
|
|
| 357 |
)
|
| 358 |
|
| 359 |
with gr.Tab("Audio-to-Audio"):
|
|
@@ -381,7 +423,8 @@ with gr.Blocks(css=css) as demo:
|
|
| 381 |
submit_btn_img2img.click(
|
| 382 |
fn = infer_img2img,
|
| 383 |
inputs = [prompt_img2img, audio_in_img2img, prompt_strength],
|
| 384 |
-
outputs = [audio_out_img2img, input_spectrogram, output_spectrogram]
|
|
|
|
| 385 |
)
|
| 386 |
|
| 387 |
with gr.Tab("Audio InPainting"):
|
|
@@ -419,25 +462,29 @@ with gr.Blocks(css=css) as demo:
|
|
| 419 |
audio_in_inp.upload(
|
| 420 |
fn = load_input_spectrogram,
|
| 421 |
inputs = [audio_in_inp],
|
| 422 |
-
outputs = [audio_in_spec]
|
|
|
|
| 423 |
)
|
| 424 |
|
| 425 |
audio_in_inp.stop_recording(
|
| 426 |
fn = load_input_spectrogram,
|
| 427 |
inputs = [audio_in_inp],
|
| 428 |
-
outputs = [audio_in_spec]
|
|
|
|
| 429 |
)
|
| 430 |
|
| 431 |
preview_mask_btn.click(
|
| 432 |
fn = preview_masked_area,
|
| 433 |
inputs = [audio_in_inp, mask_start_point, mask_end_point],
|
| 434 |
-
outputs = [masked_spec_preview]
|
|
|
|
| 435 |
)
|
| 436 |
|
| 437 |
submit_btn_inp.click(
|
| 438 |
fn = infer_inp,
|
| 439 |
inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
|
| 440 |
-
outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp]
|
|
|
|
| 441 |
)
|
| 442 |
|
| 443 |
-
demo.queue().launch(
|
|
|
|
| 49 |
|
| 50 |
@spaces.GPU
|
| 51 |
def infer(prompt, progress=gr.Progress(track_tqdm=True)):
|
| 52 |
+
"""
|
| 53 |
+
Generate audio from a textual prompt using AuffusionPipeline.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
prompt (str): Text description of the desired audio content.
|
| 57 |
+
progress (gr.Progress, optional): Progress tracker for UI feedback.
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
str: The file path to the generated WAV audio file.
|
| 61 |
+
"""
|
| 62 |
pipeline = AuffusionPipeline.from_pretrained("auffusion/auffusion")
|
| 63 |
prompt = prompt
|
| 64 |
output = pipeline(prompt=prompt)
|
|
|
|
| 69 |
|
| 70 |
@spaces.GPU
|
| 71 |
def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
|
| 72 |
+
"""
|
| 73 |
+
Perform audio-to-audio transformation with image-to-image style generation.
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
prompt (str): Text prompt guiding the audio transformation.
|
| 77 |
+
audio_path (str): File path to the input WAV audio reference.
|
| 78 |
+
desired_strength (float): Strength of prompt influence in [0.0, 1.0].
|
| 79 |
+
progress (gr.Progress, optional): Progress tracker for UI feedback.
|
| 80 |
+
|
| 81 |
+
Returns:
|
| 82 |
+
tuple:
|
| 83 |
+
- str: File path of the generated output WAV audio.
|
| 84 |
+
- str: File path of the input spectrogram image (PNG).
|
| 85 |
+
- str: File path of the output spectrogram image (PNG).
|
| 86 |
+
"""
|
| 87 |
|
| 88 |
# Load your audio file
|
| 89 |
input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
|
|
|
|
| 190 |
|
| 191 |
@spaces.GPU
|
| 192 |
def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.Progress(track_tqdm=True)):
|
| 193 |
+
"""
|
| 194 |
+
Perform audio inpainting with masked spectrogram region guided by a prompt.
|
| 195 |
+
|
| 196 |
+
Args:
|
| 197 |
+
prompt (str): Text prompt describing the desired inpainted audio content.
|
| 198 |
+
audio_path (str): File path to the input WAV audio reference.
|
| 199 |
+
mask_start_point (int): Start index of the mask region in the spectrogram.
|
| 200 |
+
mask_end_point (int): End index of the mask region in the spectrogram.
|
| 201 |
+
progress (gr.Progress, optional): Progress tracker for UI feedback.
|
| 202 |
+
|
| 203 |
+
Returns:
|
| 204 |
+
tuple:
|
| 205 |
+
- str: File path of the generated inpainted output WAV audio.
|
| 206 |
+
- str: File path of the input spectrogram image (PNG).
|
| 207 |
+
- PIL.Image.Image: The output spectrogram image with inpainted region (PIL image).
|
| 208 |
+
"""
|
| 209 |
|
| 210 |
# Load your audio file
|
| 211 |
input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
|
|
|
|
| 394 |
submit_btn.click(
|
| 395 |
fn = infer,
|
| 396 |
inputs = [prompt],
|
| 397 |
+
outputs = [audio_out],
|
| 398 |
+
show_api=True
|
| 399 |
)
|
| 400 |
|
| 401 |
with gr.Tab("Audio-to-Audio"):
|
|
|
|
| 423 |
submit_btn_img2img.click(
|
| 424 |
fn = infer_img2img,
|
| 425 |
inputs = [prompt_img2img, audio_in_img2img, prompt_strength],
|
| 426 |
+
outputs = [audio_out_img2img, input_spectrogram, output_spectrogram],
|
| 427 |
+
show_api=True
|
| 428 |
)
|
| 429 |
|
| 430 |
with gr.Tab("Audio InPainting"):
|
|
|
|
| 462 |
audio_in_inp.upload(
|
| 463 |
fn = load_input_spectrogram,
|
| 464 |
inputs = [audio_in_inp],
|
| 465 |
+
outputs = [audio_in_spec],
|
| 466 |
+
show_api=False
|
| 467 |
)
|
| 468 |
|
| 469 |
audio_in_inp.stop_recording(
|
| 470 |
fn = load_input_spectrogram,
|
| 471 |
inputs = [audio_in_inp],
|
| 472 |
+
outputs = [audio_in_spec],
|
| 473 |
+
show_api=False
|
| 474 |
)
|
| 475 |
|
| 476 |
preview_mask_btn.click(
|
| 477 |
fn = preview_masked_area,
|
| 478 |
inputs = [audio_in_inp, mask_start_point, mask_end_point],
|
| 479 |
+
outputs = [masked_spec_preview],
|
| 480 |
+
show_api=False
|
| 481 |
)
|
| 482 |
|
| 483 |
submit_btn_inp.click(
|
| 484 |
fn = infer_inp,
|
| 485 |
inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
|
| 486 |
+
outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp],
|
| 487 |
+
show_api=False
|
| 488 |
)
|
| 489 |
|
| 490 |
+
demo.queue().launch(ssr_mode=False, mcp_server=True, show_error=True)
|