diff --git a/README.md b/README.md
index 12365a5..bd7984d 100644
--- a/README.md
+++ b/README.md
@@ -50,18 +50,10 @@ DiffSynth Studio is a Diffusion engine. We have restructured architectures inclu
 
 ## Installation
 
-Create Python environment:
-
 ```
-conda env create -f environment.yml
-```
-
-We find that sometimes `conda` cannot install `cupy` correctly, please install it manually. See [this document](https://docs.cupy.dev/en/stable/install.html) for more details.
-
-Enter the Python environment:
-
-```
-conda activate DiffSynthStudio
+git clone https://github.com/modelscope/DiffSynth-Studio.git
+cd DiffSynth-Studio
+pip install -e .
 ```
 
 ## Usage (in Python code)
diff --git a/diffsynth/models/__init__.py b/diffsynth/models/__init__.py
index 21757e9..ba2f601 100644
--- a/diffsynth/models/__init__.py
+++ b/diffsynth/models/__init__.py
@@ -1,5 +1,9 @@
 import torch, os
 from safetensors import safe_open
+from typing_extensions import Literal, TypeAlias
+from typing import List
+
+from .downloader import download_from_huggingface, download_from_modelscope
 
 from .sd_text_encoder import SDTextEncoder
 from .sd_unet import SDUNet
@@ -29,13 +33,89 @@ from .hunyuan_dit_text_encoder import HunyuanDiTCLIPTextEncoder, HunyuanDiTT5Tex
 from .hunyuan_dit import HunyuanDiT
 
 
+preset_models_on_huggingface = {
+    "HunyuanDiT": [
+        ("Tencent-Hunyuan/HunyuanDiT", "t2i/clip_text_encoder/pytorch_model.bin", "models/HunyuanDiT/t2i/clip_text_encoder"),
+        ("Tencent-Hunyuan/HunyuanDiT", "t2i/mt5/pytorch_model.bin", "models/HunyuanDiT/t2i/mt5"),
+        ("Tencent-Hunyuan/HunyuanDiT", "t2i/model/pytorch_model_ema.pt", "models/HunyuanDiT/t2i/model"),
+        ("Tencent-Hunyuan/HunyuanDiT", "t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin", "models/HunyuanDiT/t2i/sdxl-vae-fp16-fix"),
+    ],
+    "stable-video-diffusion-img2vid-xt": [
+        ("stabilityai/stable-video-diffusion-img2vid-xt", "svd_xt.safetensors", "models/stable_video_diffusion"),
+    ],
+    "ExVideo-SVD-128f-v1": [
+        ("ECNU-CILab/ExVideo-SVD-128f-v1", "model.fp16.safetensors", "models/stable_video_diffusion"),
+    ],
+}
+preset_models_on_modelscope = {
+    "HunyuanDiT": [
+        ("modelscope/HunyuanDiT", "t2i/clip_text_encoder/pytorch_model.bin", "models/HunyuanDiT/t2i/clip_text_encoder"),
+        ("modelscope/HunyuanDiT", "t2i/mt5/pytorch_model.bin", "models/HunyuanDiT/t2i/mt5"),
+        ("modelscope/HunyuanDiT", "t2i/model/pytorch_model_ema.pt", "models/HunyuanDiT/t2i/model"),
+        ("modelscope/HunyuanDiT", "t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin", "models/HunyuanDiT/t2i/sdxl-vae-fp16-fix"),
+    ],
+    "stable-video-diffusion-img2vid-xt": [
+        ("AI-ModelScope/stable-video-diffusion-img2vid-xt", "svd_xt.safetensors", "models/stable_video_diffusion"),
+    ],
+    "ExVideo-SVD-128f-v1": [
+        ("ECNU-CILab/ExVideo-SVD-128f-v1", "model.fp16.safetensors", "models/stable_video_diffusion"),
+    ],
+}
+Preset_model_id: TypeAlias = Literal[
+    "HunyuanDiT",
+    "stable-video-diffusion-img2vid-xt",
+    "ExVideo-SVD-128f-v1"
+]
+Preset_model_website: TypeAlias = Literal[
+    "HuggingFace",
+    "ModelScope",
+]
+website_to_preset_models = {
+    "HuggingFace": preset_models_on_huggingface,
+    "ModelScope": preset_models_on_modelscope,
+}
+website_to_download_fn = {
+    "HuggingFace": download_from_huggingface,
+    "ModelScope": download_from_modelscope,
+}
+
+
 class ModelManager:
-    def __init__(self, torch_dtype=torch.float16, device="cuda"):
+    def __init__(
+        self,
+        torch_dtype=torch.float16,
+        device="cuda",
+        model_id_list: List[Preset_model_id] = [],
+        downloading_priority: List[Preset_model_website] = ["ModelScope", "HuggingFace"],
+        file_path_list: List[str] = [],
+    ):
         self.torch_dtype = torch_dtype
         self.device = device
         self.model = {}
         self.model_path = {}
         self.textual_inversion_dict = {}
+        downloaded_files = self.download_models(model_id_list, downloading_priority)
+        self.load_models(downloaded_files + file_path_list)
+
+    def download_models(
+        self,
+        model_id_list: List[Preset_model_id] = [],
+        downloading_priority: List[Preset_model_website] = ["ModelScope", "HuggingFace"],
+    ):
+        downloaded_files = []
+        for model_id in model_id_list:
+            for website in downloading_priority:
+                if model_id in website_to_preset_models[website]:
+                    for model_id, origin_file_path, local_dir in website_to_preset_models[website][model_id]:
+                        # Check if the file is downloaded.
+                        file_to_download = os.path.join(local_dir, os.path.basename(origin_file_path))
+                        if file_to_download in downloaded_files:
+                            continue
+                        # Download
+                        website_to_download_fn[website](model_id, origin_file_path, local_dir)
+                        if os.path.basename(origin_file_path) in os.listdir(local_dir):
+                            downloaded_files.append(file_to_download)
+        return downloaded_files
 
     def is_stable_video_diffusion(self, state_dict):
         param_name = "model.diffusion_model.output_blocks.9.1.time_stack.0.norm_in.weight"
diff --git a/diffsynth/models/downloader.py b/diffsynth/models/downloader.py
new file mode 100644
index 0000000..603dea0
--- /dev/null
+++ b/diffsynth/models/downloader.py
@@ -0,0 +1,278 @@
+from huggingface_hub import hf_hub_download
+from http.cookiejar import CookieJar
+from pathlib import Path
+from typing import Dict, Optional, List, Union
+import copy, uuid, requests, io, platform, pickle, os, urllib
+from requests.adapters import Retry
+from tqdm import tqdm
+
+
+def _get_sep(path):
+    if isinstance(path, bytes):
+        return b'/'
+    else:
+        return '/'
+
+
+def expanduser(path):
+    """Expand ~ and ~user constructions.  If user or $HOME is unknown,
+    do nothing."""
+    path = os.fspath(path)
+    if isinstance(path, bytes):
+        tilde = b'~'
+    else:
+        tilde = '~'
+    if not path.startswith(tilde):
+        return path
+    sep = _get_sep(path)
+    i = path.find(sep, 1)
+    if i < 0:
+        i = len(path)
+    if i == 1:
+        if 'HOME' not in os.environ:
+            import pwd
+            try:
+                userhome = pwd.getpwuid(os.getuid()).pw_dir
+            except KeyError:
+                # bpo-10496: if the current user identifier doesn't exist in the
+                # password database, return the path unchanged
+                return path
+        else:
+            userhome = os.environ['HOME']
+    else:
+        import pwd
+        name = path[1:i]
+        if isinstance(name, bytes):
+            name = str(name, 'ASCII')
+        try:
+            pwent = pwd.getpwnam(name)
+        except KeyError:
+            # bpo-10496: if the user name from the path doesn't exist in the
+            # password database, return the path unchanged
+            return path
+        userhome = pwent.pw_dir
+    if isinstance(path, bytes):
+        userhome = os.fsencode(userhome)
+        root = b'/'
+    else:
+        root = '/'
+    userhome = userhome.rstrip(root)
+    return (userhome + path[i:]) or root
+
+
+
+class ModelScopeConfig:
+    DEFAULT_CREDENTIALS_PATH = Path.home().joinpath('.modelscope', 'credentials')
+    path_credential = expanduser(DEFAULT_CREDENTIALS_PATH)
+    COOKIES_FILE_NAME = 'cookies'
+    GIT_TOKEN_FILE_NAME = 'git_token'
+    USER_INFO_FILE_NAME = 'user'
+    USER_SESSION_ID_FILE_NAME = 'session'
+
+    @staticmethod
+    def make_sure_credential_path_exist():
+        os.makedirs(ModelScopeConfig.path_credential, exist_ok=True)
+
+    @staticmethod
+    def get_user_session_id():
+        session_path = os.path.join(ModelScopeConfig.path_credential,
+                                    ModelScopeConfig.USER_SESSION_ID_FILE_NAME)
+        session_id = ''
+        if os.path.exists(session_path):
+            with open(session_path, 'rb') as f:
+                session_id = str(f.readline().strip(), encoding='utf-8')
+                return session_id
+        if session_id == '' or len(session_id) != 32:
+            session_id = str(uuid.uuid4().hex)
+            ModelScopeConfig.make_sure_credential_path_exist()
+            with open(session_path, 'w+') as wf:
+                wf.write(session_id)
+
+        return session_id
+
+    @staticmethod
+    def get_user_agent(user_agent: Union[Dict, str, None] = None, ) -> str:
+        """Formats a user-agent string with basic info about a request.
+
+        Args:
+            user_agent (`str`, `dict`, *optional*):
+                The user agent info in the form of a dictionary or a single string.
+
+        Returns:
+            The formatted user-agent string.
+        """
+
+        # include some more telemetrics when executing in dedicated
+        # cloud containers
+        MODELSCOPE_CLOUD_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT'
+        MODELSCOPE_CLOUD_USERNAME = 'MODELSCOPE_USERNAME'
+        env = 'custom'
+        if MODELSCOPE_CLOUD_ENVIRONMENT in os.environ:
+            env = os.environ[MODELSCOPE_CLOUD_ENVIRONMENT]
+        user_name = 'unknown'
+        if MODELSCOPE_CLOUD_USERNAME in os.environ:
+            user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]
+
+        ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % (
+            "1.15.0",
+            platform.python_version(),
+            ModelScopeConfig.get_user_session_id(),
+            platform.platform(),
+            platform.processor(),
+            env,
+            user_name,
+        )
+        if isinstance(user_agent, dict):
+            ua += '; ' + '; '.join(f'{k}/{v}' for k, v in user_agent.items())
+        elif isinstance(user_agent, str):
+            ua += '; ' + user_agent
+        return ua
+    
+    @staticmethod
+    def get_cookies():
+        cookies_path = os.path.join(ModelScopeConfig.path_credential,
+                                    ModelScopeConfig.COOKIES_FILE_NAME)
+        if os.path.exists(cookies_path):
+            with open(cookies_path, 'rb') as f:
+                cookies = pickle.load(f)
+                return cookies
+        return None
+
+
+
+def modelscope_http_get_model_file(
+    url: str,
+    local_dir: str,
+    file_name: str,
+    file_size: int,
+    cookies: CookieJar,
+    headers: Optional[Dict[str, str]] = None,
+):
+    """Download remote file, will retry 5 times before giving up on errors.
+
+    Args:
+        url(str):
+            actual download url of the file
+        local_dir(str):
+            local directory where the downloaded file stores
+        file_name(str):
+            name of the file stored in `local_dir`
+        file_size(int):
+            The file size.
+        cookies(CookieJar):
+            cookies used to authentication the user, which is used for downloading private repos
+        headers(Dict[str, str], optional):
+            http headers to carry necessary info when requesting the remote file
+
+    Raises:
+        FileDownloadError: File download failed.
+
+    """
+    get_headers = {} if headers is None else copy.deepcopy(headers)
+    get_headers['X-Request-ID'] = str(uuid.uuid4().hex)
+    temp_file_path = os.path.join(local_dir, file_name)
+    # retry sleep 0.5s, 1s, 2s, 4s
+    retry = Retry(
+        total=5,
+        backoff_factor=1,
+        allowed_methods=['GET'])
+    while True:
+        try:
+            progress = tqdm(
+                unit='B',
+                unit_scale=True,
+                unit_divisor=1024,
+                total=file_size,
+                initial=0,
+                desc='Downloading',
+            )
+            partial_length = 0
+            if os.path.exists(
+                    temp_file_path):  # download partial, continue download
+                with open(temp_file_path, 'rb') as f:
+                    partial_length = f.seek(0, io.SEEK_END)
+                    progress.update(partial_length)
+            if partial_length > file_size:
+                break
+            get_headers['Range'] = 'bytes=%s-%s' % (partial_length,
+                                                    file_size - 1)
+            with open(temp_file_path, 'ab') as f:
+                r = requests.get(
+                    url,
+                    stream=True,
+                    headers=get_headers,
+                    cookies=cookies,
+                    timeout=60)
+                r.raise_for_status()
+                for chunk in r.iter_content(
+                        chunk_size=1024 * 1024 * 1):
+                    if chunk:  # filter out keep-alive new chunks
+                        progress.update(len(chunk))
+                        f.write(chunk)
+            progress.close()
+            break
+        except (Exception) as e:  # no matter what happen, we will retry.
+            retry = retry.increment('GET', url, error=e)
+            retry.sleep()
+
+
+def get_endpoint():
+    MODELSCOPE_URL_SCHEME = 'https://'
+    DEFAULT_MODELSCOPE_DOMAIN = 'www.modelscope.cn'
+    modelscope_domain = os.getenv('MODELSCOPE_DOMAIN',
+                                  DEFAULT_MODELSCOPE_DOMAIN)
+    return MODELSCOPE_URL_SCHEME + modelscope_domain
+
+
+def get_file_download_url(model_id: str, file_path: str, revision: str):
+    """Format file download url according to `model_id`, `revision` and `file_path`.
+    e.g., Given `model_id=john/bert`, `revision=master`, `file_path=README.md`,
+    the resulted download url is: https://modelscope.cn/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md
+
+    Args:
+        model_id (str): The model_id.
+        file_path (str): File path
+        revision (str): File revision.
+
+    Returns:
+        str: The file url.
+    """
+    file_path = urllib.parse.quote_plus(file_path)
+    revision = urllib.parse.quote_plus(revision)
+    download_url_template = '{endpoint}/api/v1/models/{model_id}/repo?Revision={revision}&FilePath={file_path}'
+    return download_url_template.format(
+        endpoint=get_endpoint(),
+        model_id=model_id,
+        revision=revision,
+        file_path=file_path,
+    )
+
+
+def download_from_modelscope(model_id, origin_file_path, local_dir):
+    os.makedirs(local_dir, exist_ok=True)
+    if os.path.basename(origin_file_path) in os.listdir(local_dir):
+        print(f"{os.path.basename(origin_file_path)} has been already in {local_dir}.")
+        return
+    else:
+        print(f"Start downloading {os.path.join(local_dir, os.path.basename(origin_file_path))}")
+    headers = {'user-agent': ModelScopeConfig.get_user_agent(user_agent=None)}
+    cookies = ModelScopeConfig.get_cookies()
+    url = get_file_download_url(model_id=model_id, file_path=origin_file_path, revision="master")
+    modelscope_http_get_model_file(
+        url,
+        local_dir,
+        os.path.basename(origin_file_path),
+        file_size=0,
+        headers=headers,
+        cookies=cookies
+    )
+
+
+def download_from_huggingface(model_id, origin_file_path, local_dir):
+    os.makedirs(local_dir, exist_ok=True)
+    if os.path.basename(origin_file_path) in os.listdir(local_dir):
+        print(f"{os.path.basename(origin_file_path)} has been already in {local_dir}.")
+        return
+    else:
+        print(f"Start downloading {os.path.join(local_dir, os.path.basename(origin_file_path))}")
+    hf_hub_download(model_id, origin_file_path, local_dir=local_dir)
diff --git a/diffsynth/prompts/hunyuan_dit_prompter.py b/diffsynth/prompts/hunyuan_dit_prompter.py
index ee12cbd..2f7d981 100644
--- a/diffsynth/prompts/hunyuan_dit_prompter.py
+++ b/diffsynth/prompts/hunyuan_dit_prompter.py
@@ -1,14 +1,20 @@
 from .utils import Prompter
 from transformers import BertModel, T5EncoderModel, BertTokenizer, AutoTokenizer
-import warnings
+import warnings, os
 
 
 class HunyuanDiTPrompter(Prompter):
     def __init__(
         self,
-        tokenizer_path="configs/hunyuan_dit/tokenizer",
-        tokenizer_t5_path="configs/hunyuan_dit/tokenizer_t5"
+        tokenizer_path=None,
+        tokenizer_t5_path=None
     ):
+        if tokenizer_path is None:
+            base_path = os.path.dirname(os.path.dirname(__file__))
+            tokenizer_path = os.path.join(base_path, "tokenizer_configs/hunyuan_dit/tokenizer")
+        if tokenizer_t5_path is None:
+            base_path = os.path.dirname(os.path.dirname(__file__))
+            tokenizer_t5_path = os.path.join(base_path, "tokenizer_configs/hunyuan_dit/tokenizer_t5")
         super().__init__()
         self.tokenizer = BertTokenizer.from_pretrained(tokenizer_path)
         with warnings.catch_warnings():
diff --git a/diffsynth/prompts/sd_prompter.py b/diffsynth/prompts/sd_prompter.py
index 6d4407c..9dea60f 100644
--- a/diffsynth/prompts/sd_prompter.py
+++ b/diffsynth/prompts/sd_prompter.py
@@ -1,10 +1,14 @@
 from .utils import Prompter, tokenize_long_prompt
 from transformers import CLIPTokenizer
 from ..models import SDTextEncoder
+import os
 
 
 class SDPrompter(Prompter):
-    def __init__(self, tokenizer_path="configs/stable_diffusion/tokenizer"):
+    def __init__(self, tokenizer_path=None):
+        if tokenizer_path is None:
+            base_path = os.path.dirname(os.path.dirname(__file__))
+            tokenizer_path = os.path.join(base_path, "tokenizer_configs/stable_diffusion/tokenizer")
         super().__init__()
         self.tokenizer = CLIPTokenizer.from_pretrained(tokenizer_path)
 
diff --git a/diffsynth/prompts/sdxl_prompter.py b/diffsynth/prompts/sdxl_prompter.py
index 44b721e..e313db2 100644
--- a/diffsynth/prompts/sdxl_prompter.py
+++ b/diffsynth/prompts/sdxl_prompter.py
@@ -1,15 +1,21 @@
 from .utils import Prompter, tokenize_long_prompt
 from transformers import CLIPTokenizer
 from ..models import SDXLTextEncoder, SDXLTextEncoder2
-import torch
+import torch, os
 
 
 class SDXLPrompter(Prompter):
     def __init__(
         self,
-        tokenizer_path="configs/stable_diffusion/tokenizer",
-        tokenizer_2_path="configs/stable_diffusion_xl/tokenizer_2"
+        tokenizer_path=None,
+        tokenizer_2_path=None
     ):
+        if tokenizer_path is None:
+            base_path = os.path.dirname(os.path.dirname(__file__))
+            tokenizer_path = os.path.join(base_path, "tokenizer_configs/stable_diffusion/tokenizer")
+        if tokenizer_2_path is None:
+            base_path = os.path.dirname(os.path.dirname(__file__))
+            tokenizer_2_path = os.path.join(base_path, "tokenizer_configs/stable_diffusion_xl/tokenizer_2")
         super().__init__()
         self.tokenizer = CLIPTokenizer.from_pretrained(tokenizer_path)
         self.tokenizer_2 = CLIPTokenizer.from_pretrained(tokenizer_2_path)
diff --git a/diffsynth/prompts/utils.py b/diffsynth/prompts/utils.py
index f041228..ef51fa1 100644
--- a/diffsynth/prompts/utils.py
+++ b/diffsynth/prompts/utils.py
@@ -36,7 +36,7 @@ def tokenize_long_prompt(tokenizer, prompt):
 
 
 class BeautifulPrompt:
-    def __init__(self, tokenizer_path="configs/beautiful_prompt/tokenizer", model=None):
+    def __init__(self, tokenizer_path=None, model=None):
         self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
         self.model = model
         self.template = 'Instruction: Give a simple description of the image to generate a drawing prompt.\nInput: {raw_prompt}\nOutput:'
@@ -62,7 +62,7 @@ class BeautifulPrompt:
     
 
 class Translator:
-    def __init__(self, tokenizer_path="configs/translator/tokenizer", model=None):
+    def __init__(self, tokenizer_path=None, model=None):
         self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
         self.model = model
 
diff --git a/configs/hunyuan_dit/tokenizer/special_tokens_map.json b/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/special_tokens_map.json
similarity index 100%
rename from configs/hunyuan_dit/tokenizer/special_tokens_map.json
rename to diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/special_tokens_map.json
diff --git a/configs/hunyuan_dit/tokenizer/tokenizer_config.json b/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/tokenizer_config.json
similarity index 100%
rename from configs/hunyuan_dit/tokenizer/tokenizer_config.json
rename to diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/tokenizer_config.json
diff --git a/configs/hunyuan_dit/tokenizer/vocab.txt b/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab.txt
similarity index 100%
rename from configs/hunyuan_dit/tokenizer/vocab.txt
rename to diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab.txt
diff --git a/configs/hunyuan_dit/tokenizer/vocab_org.txt b/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab_org.txt
similarity index 100%
rename from configs/hunyuan_dit/tokenizer/vocab_org.txt
rename to diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab_org.txt
diff --git a/configs/hunyuan_dit/tokenizer_t5/config.json b/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/config.json
similarity index 100%
rename from configs/hunyuan_dit/tokenizer_t5/config.json
rename to diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/config.json
diff --git a/configs/hunyuan_dit/tokenizer_t5/special_tokens_map.json b/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/special_tokens_map.json
similarity index 100%
rename from configs/hunyuan_dit/tokenizer_t5/special_tokens_map.json
rename to diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/special_tokens_map.json
diff --git a/configs/hunyuan_dit/tokenizer_t5/spiece.model b/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/spiece.model
similarity index 100%
rename from configs/hunyuan_dit/tokenizer_t5/spiece.model
rename to diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/spiece.model
diff --git a/configs/hunyuan_dit/tokenizer_t5/tokenizer_config.json b/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/tokenizer_config.json
similarity index 100%
rename from configs/hunyuan_dit/tokenizer_t5/tokenizer_config.json
rename to diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/tokenizer_config.json
diff --git a/configs/stable_diffusion/tokenizer/merges.txt b/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/merges.txt
similarity index 100%
rename from configs/stable_diffusion/tokenizer/merges.txt
rename to diffsynth/tokenizer_configs/stable_diffusion/tokenizer/merges.txt
diff --git a/configs/stable_diffusion/tokenizer/special_tokens_map.json b/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/special_tokens_map.json
similarity index 100%
rename from configs/stable_diffusion/tokenizer/special_tokens_map.json
rename to diffsynth/tokenizer_configs/stable_diffusion/tokenizer/special_tokens_map.json
diff --git a/configs/stable_diffusion/tokenizer/tokenizer_config.json b/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/tokenizer_config.json
similarity index 100%
rename from configs/stable_diffusion/tokenizer/tokenizer_config.json
rename to diffsynth/tokenizer_configs/stable_diffusion/tokenizer/tokenizer_config.json
diff --git a/configs/stable_diffusion/tokenizer/vocab.json b/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/vocab.json
similarity index 100%
rename from configs/stable_diffusion/tokenizer/vocab.json
rename to diffsynth/tokenizer_configs/stable_diffusion/tokenizer/vocab.json
diff --git a/configs/stable_diffusion_xl/tokenizer_2/merges.txt b/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/merges.txt
similarity index 100%
rename from configs/stable_diffusion_xl/tokenizer_2/merges.txt
rename to diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/merges.txt
diff --git a/configs/stable_diffusion_xl/tokenizer_2/special_tokens_map.json b/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/special_tokens_map.json
similarity index 100%
rename from configs/stable_diffusion_xl/tokenizer_2/special_tokens_map.json
rename to diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/special_tokens_map.json
diff --git a/configs/stable_diffusion_xl/tokenizer_2/tokenizer_config.json b/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/tokenizer_config.json
similarity index 100%
rename from configs/stable_diffusion_xl/tokenizer_2/tokenizer_config.json
rename to diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/tokenizer_config.json
diff --git a/configs/stable_diffusion_xl/tokenizer_2/vocab.json b/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/vocab.json
similarity index 100%
rename from configs/stable_diffusion_xl/tokenizer_2/vocab.json
rename to diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/vocab.json
diff --git a/examples/ExVideo/ExVideo_svd_test.py b/examples/ExVideo/ExVideo_svd_test.py
index 25afd5a..0f5e797 100644
--- a/examples/ExVideo/ExVideo_svd_test.py
+++ b/examples/ExVideo/ExVideo_svd_test.py
@@ -2,6 +2,8 @@ from diffsynth import save_video, ModelManager, SVDVideoPipeline, HunyuanDiTImag
 from diffsynth import ModelManager
 import torch, os
 
+# The models will be downloaded automatically.
+# You can also use the following urls to download them manually.
 
 # Download models (from Huggingface)
 #   Text-to-image model:
@@ -14,7 +16,6 @@ import torch, os
 #   ExVideo extension blocks:
 #     `models/stable_video_diffusion/model.fp16.safetensors`: [link](https://huggingface.co/ECNU-CILab/ExVideo-SVD-128f-v1/resolve/main/model.fp16.safetensors)
 
-
 # Download models (from Modelscope)
 #   Text-to-image model:
 #     `models/HunyuanDiT/t2i/clip_text_encoder/pytorch_model.bin`: [link](https://www.modelscope.cn/api/v1/models/modelscope/HunyuanDiT/repo?Revision=master&FilePath=t2i%2Fclip_text_encoder%2Fpytorch_model.bin)
@@ -30,13 +31,7 @@ import torch, os
 def generate_image():
     # Load models
     os.environ["TOKENIZERS_PARALLELISM"] = "True"
-    model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
-    model_manager.load_models([
-        "models/HunyuanDiT/t2i/clip_text_encoder/pytorch_model.bin",
-        "models/HunyuanDiT/t2i/mt5/pytorch_model.bin",
-        "models/HunyuanDiT/t2i/model/pytorch_model_ema.pt",
-        "models/HunyuanDiT/t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin"
-    ])
+    model_manager = ModelManager(torch_dtype=torch.float16, device="cuda", model_id_list=["HunyuanDiT"])
     pipe = HunyuanDiTImagePipeline.from_model_manager(model_manager)
 
     # Generate an image
@@ -46,16 +41,13 @@ def generate_image():
         negative_prompt="错误的眼睛，糟糕的人脸，毁容，糟糕的艺术，变形，多余的肢体，模糊的颜色，模糊，重复，病态，残缺，",
         num_inference_steps=50, height=1024, width=1024,
     )
+    model_manager.to("cpu")
     return image
 
 
 def generate_video(image):
     # Load models
-    model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
-    model_manager.load_models([
-        "models/stable_video_diffusion/svd_xt.safetensors",
-        "models/stable_video_diffusion/model.fp16.safetensors"
-    ])
+    model_manager = ModelManager(torch_dtype=torch.float16, device="cuda", model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"])
     pipe = SVDVideoPipeline.from_model_manager(model_manager)
 
     # Generate a video
@@ -67,16 +59,13 @@ def generate_video(image):
         num_inference_steps=50,
         min_cfg_scale=2, max_cfg_scale=2, contrast_enhance_scale=1.2
     )
+    model_manager.to("cpu")
     return video
 
 
 def upscale_video(image, video):
     # Load models
-    model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
-    model_manager.load_models([
-        "models/stable_video_diffusion/svd_xt.safetensors",
-        "models/stable_video_diffusion/model.fp16.safetensors",
-    ])
+    model_manager = ModelManager(torch_dtype=torch.float16, device="cuda", model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"])
     pipe = SVDVideoPipeline.from_model_manager(model_manager)
 
     # Generate a video
@@ -89,19 +78,20 @@ def upscale_video(image, video):
         num_inference_steps=25,
         min_cfg_scale=2, max_cfg_scale=2, contrast_enhance_scale=1.2
     )
+    model_manager.to("cpu")
     return video
 
 
-# We use Hunyuan DiT to generate the first frame.
+# We use Hunyuan DiT to generate the first frame. 10GB VRAM is required.
 # If you want to use your own image,
 # please use `image = Image.open("your_image_file.png")` to replace the following code.
 image = generate_image()
 image.save("image.png")
 
-# Now, generate a video with resolution of 512.
+# Now, generate a video with resolution of 512. 20GB VRAM is required.
 video = generate_video(image)
 save_video(video, "video_512.mp4", fps=30)
 
-# Upscale the video.
+# Upscale the video. 52GB VRAM is required.
 video = upscale_video(image, video)
 save_video(video, "video_1024.mp4", fps=30)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a300a2c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,12 @@
+torch>=2.0.0
+cupy-cuda12x
+pip
+transformers
+controlnet-aux==0.0.7
+streamlit
+streamlit-drawable-canvas
+imageio
+imageio[ffmpeg]
+safetensors
+einops
+sentencepiece
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..b4da93f
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,20 @@
+import os
+import pkg_resources
+from setuptools import setup, find_packages
+
+
+setup(
+    name="diffsynth",
+    py_modules=["diffsynth"],
+    version="1.0.0",
+    description="",
+    author="Artiprocher",
+    packages=find_packages(exclude=["diffsynth"]),
+    install_requires=[
+        str(r)
+        for r in pkg_resources.parse_requirements(
+            open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
+        )
+    ],
+    include_package_data=True
+)