Merge pull request #374 from modelscope/wan-tokenizer-bugfix

align wan tokenizer to official
2026-03-18 22:08:13 +00:00 · 2025-02-28 16:05:36 +08:00
parent a48822ec00 61a30673c2
commit f9f49e3c78
3 changed files with 7 additions and 0 deletions
--- a/diffsynth/models/wan_video_vae.py
+++ b/diffsynth/models/wan_video_vae.py
@@ -785,6 +785,7 @@ class WanVideoVAE(nn.Module):
                video = self.single_decode(hidden_state, device)
            video = video.squeeze(0)
            videos.append(video)
+        videos = torch.stack(videos)
        return videos


--- a/diffsynth/prompters/wan_prompter.py
+++ b/diffsynth/prompters/wan_prompter.py
@@ -2,20 +2,24 @@ from .base_prompter import BasePrompter
 from ..models.wan_video_text_encoder import WanTextEncoder
 from transformers import AutoTokenizer
 import os, torch
+import ftfy
 import html
 import string
 import regex as re


 def basic_clean(text):
+    text = ftfy.fix_text(text)
    text = html.unescape(html.unescape(text))
    return text.strip()

+
 def whitespace_clean(text):
    text = re.sub(r'\s+', ' ', text)
    text = text.strip()
    return text

+
 def canonicalize(text, keep_punctuation_exact_string=None):
    text = text.replace('_', ' ')
    if keep_punctuation_exact_string:
@@ -28,6 +32,7 @@ def canonicalize(text, keep_punctuation_exact_string=None):
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

+
 class HuggingfaceTokenizer:

    def __init__(self, name, seq_len=None, clean=None, **kwargs):