Build (aarch64)
Browse files- build/torch26-cxx11-cu126-aarch64-linux/flash_mla/__init__.py +33 -0
- build/torch26-cxx11-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so +3 -0
- build/torch26-cxx11-cu126-aarch64-linux/flash_mla/_ops.py +9 -0
- build/torch26-cxx98-cu126-aarch64-linux/flash_mla/__init__.py +33 -0
- build/torch26-cxx98-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so +3 -0
- build/torch26-cxx98-cu126-aarch64-linux/flash_mla/_ops.py +9 -0
- build/torch27-cxx11-cu126-aarch64-linux/flash_mla/__init__.py +33 -0
- build/torch27-cxx11-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so +3 -0
- build/torch27-cxx11-cu126-aarch64-linux/flash_mla/_ops.py +9 -0
- build/torch27-cxx11-cu128-aarch64-linux/flash_mla/__init__.py +33 -0
- build/torch27-cxx11-cu128-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so +3 -0
- build/torch27-cxx11-cu128-aarch64-linux/flash_mla/_ops.py +9 -0
build/torch26-cxx11-cu126-aarch64-linux/flash_mla/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
|
| 3 |
+
from ._ops import ops
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
| 7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def mha_fwd_kvcache_mla(
|
| 11 |
+
q: torch.Tensor,
|
| 12 |
+
kcache: torch.Tensor,
|
| 13 |
+
vcache_: torch.Tensor,
|
| 14 |
+
head_size_v: int,
|
| 15 |
+
seqlens_k: torch.Tensor,
|
| 16 |
+
block_table: torch.Tensor,
|
| 17 |
+
softmax_scale: float,
|
| 18 |
+
is_causal_: bool,
|
| 19 |
+
tile_scheduler_metadata: torch.Tensor,
|
| 20 |
+
num_splits: torch.Tensor,
|
| 21 |
+
) -> torch.Tensor:
|
| 22 |
+
return ops.mha_fwd_kvcache_mla(
|
| 23 |
+
q,
|
| 24 |
+
kcache,
|
| 25 |
+
vcache_,
|
| 26 |
+
head_size_v,
|
| 27 |
+
seqlens_k,
|
| 28 |
+
block_table,
|
| 29 |
+
softmax_scale,
|
| 30 |
+
is_causal_,
|
| 31 |
+
tile_scheduler_metadata,
|
| 32 |
+
num_splits
|
| 33 |
+
)
|
build/torch26-cxx11-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1e97fef62f5ebbe6b19b0d5fbe700fcdf6b9acd7a54cba6f0b1d23665188fa9
|
| 3 |
+
size 2643848
|
build/torch26-cxx11-cu126-aarch64-linux/flash_mla/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _flash_mla_341ab77
|
| 3 |
+
ops = torch.ops._flash_mla_341ab77
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_flash_mla_341ab77::{op_name}"
|
build/torch26-cxx98-cu126-aarch64-linux/flash_mla/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
|
| 3 |
+
from ._ops import ops
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
| 7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def mha_fwd_kvcache_mla(
|
| 11 |
+
q: torch.Tensor,
|
| 12 |
+
kcache: torch.Tensor,
|
| 13 |
+
vcache_: torch.Tensor,
|
| 14 |
+
head_size_v: int,
|
| 15 |
+
seqlens_k: torch.Tensor,
|
| 16 |
+
block_table: torch.Tensor,
|
| 17 |
+
softmax_scale: float,
|
| 18 |
+
is_causal_: bool,
|
| 19 |
+
tile_scheduler_metadata: torch.Tensor,
|
| 20 |
+
num_splits: torch.Tensor,
|
| 21 |
+
) -> torch.Tensor:
|
| 22 |
+
return ops.mha_fwd_kvcache_mla(
|
| 23 |
+
q,
|
| 24 |
+
kcache,
|
| 25 |
+
vcache_,
|
| 26 |
+
head_size_v,
|
| 27 |
+
seqlens_k,
|
| 28 |
+
block_table,
|
| 29 |
+
softmax_scale,
|
| 30 |
+
is_causal_,
|
| 31 |
+
tile_scheduler_metadata,
|
| 32 |
+
num_splits
|
| 33 |
+
)
|
build/torch26-cxx98-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f15b3b0bd0bee56760bd6500175ca5a1fd17f2742ef9496c28ea3720d038c66
|
| 3 |
+
size 2640208
|
build/torch26-cxx98-cu126-aarch64-linux/flash_mla/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _flash_mla_341ab77
|
| 3 |
+
ops = torch.ops._flash_mla_341ab77
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_flash_mla_341ab77::{op_name}"
|
build/torch27-cxx11-cu126-aarch64-linux/flash_mla/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
|
| 3 |
+
from ._ops import ops
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
| 7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def mha_fwd_kvcache_mla(
|
| 11 |
+
q: torch.Tensor,
|
| 12 |
+
kcache: torch.Tensor,
|
| 13 |
+
vcache_: torch.Tensor,
|
| 14 |
+
head_size_v: int,
|
| 15 |
+
seqlens_k: torch.Tensor,
|
| 16 |
+
block_table: torch.Tensor,
|
| 17 |
+
softmax_scale: float,
|
| 18 |
+
is_causal_: bool,
|
| 19 |
+
tile_scheduler_metadata: torch.Tensor,
|
| 20 |
+
num_splits: torch.Tensor,
|
| 21 |
+
) -> torch.Tensor:
|
| 22 |
+
return ops.mha_fwd_kvcache_mla(
|
| 23 |
+
q,
|
| 24 |
+
kcache,
|
| 25 |
+
vcache_,
|
| 26 |
+
head_size_v,
|
| 27 |
+
seqlens_k,
|
| 28 |
+
block_table,
|
| 29 |
+
softmax_scale,
|
| 30 |
+
is_causal_,
|
| 31 |
+
tile_scheduler_metadata,
|
| 32 |
+
num_splits
|
| 33 |
+
)
|
build/torch27-cxx11-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb925b062d31034672a45d925a3767d953e97a3c6c483467e6b81833d42b5a27
|
| 3 |
+
size 2644048
|
build/torch27-cxx11-cu126-aarch64-linux/flash_mla/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _flash_mla_341ab77
|
| 3 |
+
ops = torch.ops._flash_mla_341ab77
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_flash_mla_341ab77::{op_name}"
|
build/torch27-cxx11-cu128-aarch64-linux/flash_mla/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
|
| 3 |
+
from ._ops import ops
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
| 7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def mha_fwd_kvcache_mla(
|
| 11 |
+
q: torch.Tensor,
|
| 12 |
+
kcache: torch.Tensor,
|
| 13 |
+
vcache_: torch.Tensor,
|
| 14 |
+
head_size_v: int,
|
| 15 |
+
seqlens_k: torch.Tensor,
|
| 16 |
+
block_table: torch.Tensor,
|
| 17 |
+
softmax_scale: float,
|
| 18 |
+
is_causal_: bool,
|
| 19 |
+
tile_scheduler_metadata: torch.Tensor,
|
| 20 |
+
num_splits: torch.Tensor,
|
| 21 |
+
) -> torch.Tensor:
|
| 22 |
+
return ops.mha_fwd_kvcache_mla(
|
| 23 |
+
q,
|
| 24 |
+
kcache,
|
| 25 |
+
vcache_,
|
| 26 |
+
head_size_v,
|
| 27 |
+
seqlens_k,
|
| 28 |
+
block_table,
|
| 29 |
+
softmax_scale,
|
| 30 |
+
is_causal_,
|
| 31 |
+
tile_scheduler_metadata,
|
| 32 |
+
num_splits
|
| 33 |
+
)
|
build/torch27-cxx11-cu128-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7776c629263bc0b32b82b8a094ead0749d6c393b6ca25c9ffa812bd8fbdb3002
|
| 3 |
+
size 2709472
|
build/torch27-cxx11-cu128-aarch64-linux/flash_mla/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _flash_mla_341ab77
|
| 3 |
+
ops = torch.ops._flash_mla_341ab77
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_flash_mla_341ab77::{op_name}"
|