Merge pull request #1191 from Feng0w0/wan_rope

[model][NPU]:Wan model rope use torch.complex64 in NPU
2026-03-18 22:08:13 +00:00 · 2026-01-20 10:05:22 +08:00
parent 88497b5c13 d16877e695
commit a8b340c098
5 changed files with 6 additions and 4 deletions
--- a/diffsynth/core/device/init.py
+++ b/diffsynth/core/device/init.py
@@ -1,2 +1,2 @@
 from .npu_compatible_device import parse_device_type, parse_nccl_backend, get_available_device_type, get_device_name
-from .npu_compatible_device import IS_NPU_AVAILABLE
+from .npu_compatible_device import IS_NPU_AVAILABLE, IS_CUDA_AVAILABLE
--- a/diffsynth/models/wan_video_dit.py
+++ b/diffsynth/models/wan_video_dit.py
@@ -5,6 +5,7 @@ import math
 from typing import Tuple, Optional
 from einops import rearrange
 from .wan_video_camera_controller import SimpleAdapter
+
 try:
    import flash_attn_interface
    FLASH_ATTN_3_AVAILABLE = True
@@ -92,6 +93,7 @@ def rope_apply(x, freqs, num_heads):
    x = rearrange(x, "b s (n d) -> b s n d", n=num_heads)
    x_out = torch.view_as_complex(x.to(torch.float64).reshape(
        x.shape[0], x.shape[1], x.shape[2], -1, 2))
+    freqs = freqs.to(torch.complex64) if freqs.device == "npu" else freqs
    x_out = torch.view_as_real(x_out * freqs).flatten(2)
    return x_out.to(x.dtype)

--- a/diffsynth/utils/xfuser/xdit_context_parallel.py
+++ b/diffsynth/utils/xfuser/xdit_context_parallel.py
@@ -50,7 +50,7 @@ def rope_apply(x, freqs, num_heads):
    sp_rank = get_sequence_parallel_rank()
    freqs = pad_freqs(freqs, s_per_rank * sp_size)
    freqs_rank = freqs[(sp_rank * s_per_rank):((sp_rank + 1) * s_per_rank), :, :]
-
+    freqs_rank = freqs_rank.to(torch.complex64) if freqs_rank.device == "npu" else freqs_rank
    x_out = torch.view_as_real(x_out * freqs_rank).flatten(2)
    return x_out.to(x.dtype)