Spaces:

rahul7star
/

Flash-Attn-Demo

Sleeping

App Files Files Community

rahul7star commited on Nov 10

Commit

88e95a7

verified ·

1 Parent(s): 53ed132

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -7

app.py CHANGED Viewed

@@ -37,24 +37,33 @@ def var_reference_attention(q, k, v, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, m
 # 🎛️ Test function
 # ============================================================
 def run_flash_attention(B=2, S=5, H=4, D=8, seed=42):
-    torch.manual_seed(seed)
-    q = k = v = torch.randn(B, S, H, D, device=device, dtype=torch.bfloat16)
-    log = io.StringIO()
     with contextlib.redirect_stdout(log):
         print(f"Running FlashAttention Tests on device: {device}")
         print(f"Input shape: B={B}, S={S}, H={H}, D={D}\n")
         # Standard attention
         out_ref = reference_attention(q, k, v)
-        out_flash, _ = flash_attn.flash_attn_func(q, k, v, causal=False)
         print("1. Standard attention:")
         print(f"  Reference: {out_ref.shape}, Flash: {out_flash.shape}")
         print(f"  Outputs close: {torch.allclose(out_flash, out_ref, atol=1e-2, rtol=1e-3)}\n")
         # Causal attention
         out_ref_causal = reference_attention(q, k, v, causal=True)
-        out_causal, _ = flash_attn.flash_attn_func(q, k, v, causal=True)
         print("2. Causal attention:")
         print(f"  Reference: {out_ref_causal.shape}, Flash: {out_causal.shape}")
         print(f"  Outputs close: {torch.allclose(out_causal, out_ref_causal, atol=1e-2, rtol=1e-3)}\n")
@@ -66,7 +75,7 @@ def run_flash_attention(B=2, S=5, H=4, D=8, seed=42):
 # ============================================================
 with gr.Blocks(title="Flash Attention Kernel Tester") as demo:
     gr.Markdown("## ⚡ Flash Attention Kernel Tester")
-    gr.Markdown("Run reference vs FlashAttention comparisons interactively.")
     with gr.Row():
         B = gr.Slider(1, 8, value=2, step=1, label="Batch Size (B)")
@@ -77,7 +86,7 @@ with gr.Blocks(title="Flash Attention Kernel Tester") as demo:
     seed = gr.Number(value=42, label="Random Seed")
     run_btn = gr.Button("🚀 Run Tests")
-    output = gr.Textbox(label="Console Output", lines=25)
     run_btn.click(run_flash_attention, inputs=[B, S, H, D, seed], outputs=output)

 # 🎛️ Test function
 # ============================================================
 def run_flash_attention(B=2, S=5, H=4, D=8, seed=42):
+    B, S, H, D = int(B), int(S), int(H), int(D)
+    torch.manual_seed(int(seed))
+    dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
+    q = k = v = torch.randn(B, S, H, D, device=device, dtype=dtype)
+    log = io.StringIO()
     with contextlib.redirect_stdout(log):
         print(f"Running FlashAttention Tests on device: {device}")
         print(f"Input shape: B={B}, S={S}, H={H}, D={D}\n")
         # Standard attention
         out_ref = reference_attention(q, k, v)
+        try:
+            out_flash = flash_attn["flash_attn_func"](q, k, v, causal=False)
+        except TypeError:
+            out_flash, _ = flash_attn["flash_attn_func"](q, k, v, causal=False)
         print("1. Standard attention:")
         print(f"  Reference: {out_ref.shape}, Flash: {out_flash.shape}")
         print(f"  Outputs close: {torch.allclose(out_flash, out_ref, atol=1e-2, rtol=1e-3)}\n")
         # Causal attention
         out_ref_causal = reference_attention(q, k, v, causal=True)
+        try:
+            out_causal = flash_attn["flash_attn_func"](q, k, v, causal=True)
+        except TypeError:
+            out_causal, _ = flash_attn["flash_attn_func"](q, k, v, causal=True)
         print("2. Causal attention:")
         print(f"  Reference: {out_ref_causal.shape}, Flash: {out_causal.shape}")
         print(f"  Outputs close: {torch.allclose(out_causal, out_ref_causal, atol=1e-2, rtol=1e-3)}\n")
 # ============================================================
 with gr.Blocks(title="Flash Attention Kernel Tester") as demo:
     gr.Markdown("## ⚡ Flash Attention Kernel Tester")
+    gr.Markdown("Compare PyTorch SDPA vs FlashAttention implementations interactively.")
     with gr.Row():
         B = gr.Slider(1, 8, value=2, step=1, label="Batch Size (B)")
     seed = gr.Number(value=42, label="Random Seed")
     run_btn = gr.Button("🚀 Run Tests")
+    output = gr.Textbox(label="Console Output", lines=25, show_copy_button=True)
     run_btn.click(run_flash_attention, inputs=[B, S, H, D, seed], outputs=output)