align wan tokenizer to official

This commit is contained in:
Artiprocher
2025-02-28 15:50:07 +08:00
parent 6fa8dbe077
commit 61a30673c2
3 changed files with 7 additions and 0 deletions

View File

@@ -785,6 +785,7 @@ class WanVideoVAE(nn.Module):
video = self.single_decode(hidden_state, device) video = self.single_decode(hidden_state, device)
video = video.squeeze(0) video = video.squeeze(0)
videos.append(video) videos.append(video)
videos = torch.stack(videos)
return videos return videos

View File

@@ -2,20 +2,24 @@ from .base_prompter import BasePrompter
from ..models.wan_video_text_encoder import WanTextEncoder from ..models.wan_video_text_encoder import WanTextEncoder
from transformers import AutoTokenizer from transformers import AutoTokenizer
import os, torch import os, torch
import ftfy
import html import html
import string import string
import regex as re import regex as re
def basic_clean(text): def basic_clean(text):
text = ftfy.fix_text(text)
text = html.unescape(html.unescape(text)) text = html.unescape(html.unescape(text))
return text.strip() return text.strip()
def whitespace_clean(text): def whitespace_clean(text):
text = re.sub(r'\s+', ' ', text) text = re.sub(r'\s+', ' ', text)
text = text.strip() text = text.strip()
return text return text
def canonicalize(text, keep_punctuation_exact_string=None): def canonicalize(text, keep_punctuation_exact_string=None):
text = text.replace('_', ' ') text = text.replace('_', ' ')
if keep_punctuation_exact_string: if keep_punctuation_exact_string:
@@ -28,6 +32,7 @@ def canonicalize(text, keep_punctuation_exact_string=None):
text = re.sub(r'\s+', ' ', text) text = re.sub(r'\s+', ' ', text)
return text.strip() return text.strip()
class HuggingfaceTokenizer: class HuggingfaceTokenizer:
def __init__(self, name, seq_len=None, clean=None, **kwargs): def __init__(self, name, seq_len=None, clean=None, **kwargs):

View File

@@ -10,3 +10,4 @@ einops
sentencepiece sentencepiece
protobuf protobuf
modelscope modelscope
ftfy