add pre-compiled beta cuda kernel (rwkv-beta==0.8.5, 40%+ faster for fp16) (thanks to #180, pre-compiled kernel of RTX 40 Series will be included later)
This commit is contained in:
1
backend-python/rwkv_pip/beta/model.py
vendored
1
backend-python/rwkv_pip/beta/model.py
vendored
@@ -94,6 +94,7 @@ if os.environ.get("RWKV_CUDA_ON") == "1":
|
||||
f"{current_path}/cuda/att_one_v5.cu",
|
||||
],
|
||||
verbose=True,
|
||||
extra_ldflags=["cublas.lib"],
|
||||
extra_cuda_cflags=[
|
||||
"-t 4",
|
||||
"-std=c++17",
|
||||
|
||||
BIN
backend-python/rwkv_pip/beta/wkv_cuda.pyd
vendored
Normal file
BIN
backend-python/rwkv_pip/beta/wkv_cuda.pyd
vendored
Normal file
Binary file not shown.
Reference in New Issue
Block a user