Fix

2026-03-19 06:39:43 +00:00 · 2025-02-26 14:18:36 +08:00
parent bed770248b
commit 4268f5466b
9 changed files with 14 additions and 14 deletions
--- a/diffsynth/models/kolors_text_encoder.py
+++ b/diffsynth/models/kolors_text_encoder.py
@@ -980,7 +980,7 @@ class Embedding(torch.nn.Module):
        # Embeddings.
        words_embeddings = self.word_embeddings(input_ids)
        embeddings = words_embeddings
-        # Data format change to avoid explicit tranposes : [b s h] --> [s b h].
+        # Data format change to avoid explicit transposes : [b s h] --> [s b h].
        embeddings = embeddings.transpose(0, 1).contiguous()
        # If the input flag for fp32 residual connection is set, convert for float.
        if self.fp32_residual_connection:
--- a/diffsynth/models/stepvideo_dit.py
+++ b/diffsynth/models/stepvideo_dit.py
@@ -398,7 +398,7 @@ class RoPE1D:
            * tokens: batch_size x ntokens x nheads x dim
            * positions: batch_size x ntokens (t position of each token)
        output:
-            * tokens after appplying RoPE2D (batch_size x ntokens x nheads x dim)
+            * tokens after applying RoPE2D (batch_size x ntokens x nheads x dim)
        """
        D = tokens.size(3)
        assert positions.ndim == 2  # Batch, Seq
@@ -428,7 +428,7 @@ class RoPE3D(RoPE1D):
            * tokens: batch_size x ntokens x nheads x dim
            * rope_positions: list of (f, h, w)
        output:
-            * tokens after appplying RoPE2D (batch_size x ntokens x nheads x dim)
+            * tokens after applying RoPE2D (batch_size x ntokens x nheads x dim)
        """
        assert sum(ch_split) == tokens.size(-1); 

--- a/diffsynth/models/stepvideo_text_encoder.py
+++ b/diffsynth/models/stepvideo_text_encoder.py
@@ -88,7 +88,7 @@ class LLaMaEmbedding(nn.Module):
            embeddings = embeddings.to(self.params_dtype)
            self.word_embeddings = self.word_embeddings.to(self.params_dtype)

-        # Data format change to avoid explicit tranposes : [b s h] --> [s b h].
+        # Data format change to avoid explicit transposes : [b s h] --> [s b h].
        embeddings = embeddings.transpose(0, 1).contiguous()

        # If the input flag for fp32 residual connection is set, convert for float.
@@ -326,7 +326,7 @@ class MultiQueryAttention(nn.Module):
            dim=-1,
        )

-        # gather on 1st dimention
+        # gather on 1st dimension
        xq = xq.view(seqlen, bsz, self.n_local_heads, self.head_dim)
        xkv = xkv.view(seqlen, bsz, self.n_local_groups, 2 * self.head_dim)
        xk, xv = xkv.chunk(2, -1)
@@ -357,7 +357,7 @@ class MultiQueryAttention(nn.Module):
            output = self.core_attention(xq, xk, xv,
                                      cu_seqlens=cu_seqlens,
                                      max_seq_len=max_seq_len)
-            # reduce-scatter only support first dimention now
+            # reduce-scatter only support first dimension now
            output = rearrange(output, "b s h d -> s b (h d)").contiguous()
        else:
            xq, xk, xv = [
--- a/diffsynth/models/tiler.py
+++ b/diffsynth/models/tiler.py
@@ -55,7 +55,7 @@ class TileWorker:


    def io_scale(self, model_output, tile_size):
-        # Determine the size modification happend in forward_fn
+        # Determine the size modification happened in forward_fn
        # We only consider the same scale on height and width.
        io_scale = model_output.shape[2] / tile_size
        return io_scale
--- a/diffsynth/pipelines/omnigen_image.py
+++ b/diffsynth/pipelines/omnigen_image.py
@@ -16,7 +16,7 @@ class OmniGenCache(DynamicCache):
    def __init__(self, 
                    num_tokens_for_img: int, offload_kv_cache: bool=False) -> None:
        if not torch.cuda.is_available():
-            print("No avaliable GPU, offload_kv_cache wiil be set to False, which will result in large memory usage and time cost when input multiple images!!!")
+            print("No available GPU, offload_kv_cache will be set to False, which will result in large memory usage and time cost when input multiple images!!!")
            offload_kv_cache = False
            raise RuntimeError("OffloadedCache can only be used with a GPU")
        super().__init__()