Merge pull request #187 from modelscope/omost

support Omost LLM
2026-04-08 08:58:20 +00:00 · 2024-09-04 12:52:23 +08:00
parent 3d1db23224 1118e67cec
commit 8d6eb6d41a
9 changed files with 391 additions and 13 deletions
--- a/diffsynth/configs/model_config.py
+++ b/diffsynth/configs/model_config.py
@@ -86,6 +86,7 @@ huggingface_model_loader_configs = [
    ("ChatGLMModel", "diffsynth.models.kolors_text_encoder", "kolors_text_encoder", None),
    ("MarianMTModel", "transformers.models.marian.modeling_marian", "translator", None),
    ("BloomForCausalLM", "transformers.models.bloom.modeling_bloom", "beautiful_prompt", None),
+    ("LlamaForCausalLM", "transformers.models.llama.modeling_llama", "omost_prompt", None),
    ("T5EncoderModel", "diffsynth.models.flux_text_encoder", "flux_text_encoder_2", "FluxTextEncoder2"),
    ("CogVideoXTransformer3DModel", "diffsynth.models.cog_dit", "cog_dit", "CogDiT"),
 ]
@@ -227,6 +228,18 @@ preset_models_on_modelscope = {
        ("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "tokenizer.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
        ("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "tokenizer_config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
    ],
+    # Omost prompt
+    "OmostPrompt":[
+        ("Omost/omost-llama-3-8b-4bits", "model-00001-of-00002.safetensors", "models/OmostPrompt/omost-llama-3-8b-4bits"),
+        ("Omost/omost-llama-3-8b-4bits", "model-00002-of-00002.safetensors", "models/OmostPrompt/omost-llama-3-8b-4bits"),
+        ("Omost/omost-llama-3-8b-4bits", "tokenizer.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
+        ("Omost/omost-llama-3-8b-4bits", "tokenizer_config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),  
+        ("Omost/omost-llama-3-8b-4bits", "config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
+        ("Omost/omost-llama-3-8b-4bits", "generation_config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
+        ("Omost/omost-llama-3-8b-4bits", "model.safetensors.index.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
+        ("Omost/omost-llama-3-8b-4bits", "special_tokens_map.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
+    ],
+    
    # Translator
    "opus-mt-zh-en": [
        ("moxying/opus-mt-zh-en", "config.json", "models/translator/opus-mt-zh-en"),
@@ -325,6 +338,7 @@ Preset_model_id: TypeAlias = Literal[
    "ControlNet_union_sdxl_promax",
    "FLUX.1-dev",
    "SDXL_lora_zyd232_ChineseInkStyle_SDXL_v1_0",
+    "OmostPrompt",
    "ESRGAN_x4",
    "RIFE",
    "CogVideoX-5B",
--- a/diffsynth/models/model_manager.py
+++ b/diffsynth/models/model_manager.py
@@ -119,7 +119,10 @@ def load_model_from_huggingface_folder(file_path, model_names, model_classes, to
        model = model_class.from_pretrained(file_path, torch_dtype=torch_dtype).eval()
        if torch_dtype == torch.float16 and hasattr(model, "half"):
            model = model.half()
-        model = model.to(device=device)
+        try:
+            model = model.to(device=device)
+        except:
+            pass
        loaded_model_names.append(model_name)
        loaded_models.append(model)
    return loaded_model_names, loaded_models
--- a/diffsynth/pipelines/base.py
+++ b/diffsynth/pipelines/base.py
@@ -50,4 +50,13 @@ class BasePipeline(torch.nn.Module):
        noise_pred_locals = [inference_callback(prompt_emb_local) for prompt_emb_local in prompt_emb_locals]
        noise_pred = self.merge_latents(noise_pred_global, noise_pred_locals, masks, mask_scales)
        return noise_pred
+    
+
+    def extend_prompt(self, prompt, local_prompts, masks, mask_scales):
+        extended_prompt_dict = self.prompter.extend_prompt(prompt)
+        prompt = extended_prompt_dict.get("prompt", prompt)
+        local_prompts += extended_prompt_dict.get("prompts", [])
+        masks += extended_prompt_dict.get("masks", [])
+        mask_scales += [5.0] * len(extended_prompt_dict.get("masks", []))
+        return prompt, local_prompts, masks, mask_scales
    
--- a/diffsynth/pipelines/flux_image.py
+++ b/diffsynth/pipelines/flux_image.py
@@ -25,7 +25,7 @@ class FluxImagePipeline(BasePipeline):
        return self.dit


-    def fetch_models(self, model_manager: ModelManager, prompt_refiner_classes=[]):
+    def fetch_models(self, model_manager: ModelManager, prompt_refiner_classes=[], prompt_extender_classes=[]):
        self.text_encoder_1 = model_manager.fetch_model("flux_text_encoder_1")
        self.text_encoder_2 = model_manager.fetch_model("flux_text_encoder_2")
        self.dit = model_manager.fetch_model("flux_dit")
@@ -33,15 +33,16 @@ class FluxImagePipeline(BasePipeline):
        self.vae_encoder = model_manager.fetch_model("flux_vae_encoder")
        self.prompter.fetch_models(self.text_encoder_1, self.text_encoder_2)
        self.prompter.load_prompt_refiners(model_manager, prompt_refiner_classes)
+        self.prompter.load_prompt_extenders(model_manager, prompt_extender_classes)


    @staticmethod
-    def from_model_manager(model_manager: ModelManager, prompt_refiner_classes=[]):
+    def from_model_manager(model_manager: ModelManager, prompt_refiner_classes=[],prompt_extender_classes=[]):
        pipe = FluxImagePipeline(
            device=model_manager.device,
            torch_dtype=model_manager.torch_dtype,
        )
-        pipe.fetch_models(model_manager, prompt_refiner_classes)
+        pipe.fetch_models(model_manager, prompt_refiner_classes,prompt_extender_classes)
        return pipe
    

@@ -105,6 +106,9 @@ class FluxImagePipeline(BasePipeline):
        else:
            latents = torch.randn((1, 16, height//8, width//8), device=self.device, dtype=self.torch_dtype)

+        # Extend prompt
+        prompt, local_prompts, masks, mask_scales = self.extend_prompt(prompt, local_prompts, masks, mask_scales)
+
        # Encode prompts
        prompt_emb_posi = self.encode_prompt(prompt, positive=True)
        if cfg_scale != 1.0:
--- a/diffsynth/prompters/init.py
+++ b/diffsynth/prompters/init.py
@@ -5,4 +5,5 @@ from .sd3_prompter import SD3Prompter
 from .hunyuan_dit_prompter import HunyuanDiTPrompter
 from .kolors_prompter import KolorsPrompter
 from .flux_prompter import FluxPrompter
+from .omost import OmostPromter
 from .cog_prompter import CogPrompter
--- a/diffsynth/prompters/base_prompter.py
+++ b/diffsynth/prompters/base_prompter.py
@@ -37,14 +37,20 @@ def tokenize_long_prompt(tokenizer, prompt, max_length=None):


 class BasePrompter:
-    def __init__(self, refiners=[]):
+    def __init__(self, refiners=[], extenders=[]):
        self.refiners = refiners
+        self.extenders = extenders


-    def load_prompt_refiners(self, model_nameger: ModelManager, refiner_classes=[]):
+    def load_prompt_refiners(self, model_manager: ModelManager, refiner_classes=[]):
        for refiner_class in refiner_classes:
-            refiner = refiner_class.from_model_manager(model_nameger)
+            refiner = refiner_class.from_model_manager(model_manager)
            self.refiners.append(refiner)
+    
+    def load_prompt_extenders(self,model_manager:ModelManager,extender_classes=[]):
+        for extender_class in extender_classes:
+            extender = extender_class.from_model_manager(model_manager)
+            self.extenders.append(extender)


    @torch.no_grad()
@@ -55,3 +61,10 @@ class BasePrompter:
            for refiner in self.refiners:
                prompt = refiner(prompt, positive=positive)
        return prompt
+
+    @torch.no_grad()
+    def extend_prompt(self, prompt:str, positive=True):
+        extended_prompt = dict(prompt=prompt)
+        for extender in self.extenders:
+            extended_prompt = extender(extended_prompt)
+        return extended_prompt
--- a/diffsynth/prompters/omost.py
+++ b/diffsynth/prompters/omost.py
@@ -0,0 +1,311 @@
+from transformers import AutoTokenizer, TextIteratorStreamer
+import difflib
+import torch
+import numpy as np
+import re
+from ..models.model_manager import ModelManager
+from PIL import Image
+
+valid_colors = {  # r, g, b
+    'aliceblue': (240, 248, 255), 'antiquewhite': (250, 235, 215), 'aqua': (0, 255, 255),
+    'aquamarine': (127, 255, 212), 'azure': (240, 255, 255), 'beige': (245, 245, 220),
+    'bisque': (255, 228, 196), 'black': (0, 0, 0), 'blanchedalmond': (255, 235, 205), 'blue': (0, 0, 255),
+    'blueviolet': (138, 43, 226), 'brown': (165, 42, 42), 'burlywood': (222, 184, 135),
+    'cadetblue': (95, 158, 160), 'chartreuse': (127, 255, 0), 'chocolate': (210, 105, 30),
+    'coral': (255, 127, 80), 'cornflowerblue': (100, 149, 237), 'cornsilk': (255, 248, 220),
+    'crimson': (220, 20, 60), 'cyan': (0, 255, 255), 'darkblue': (0, 0, 139), 'darkcyan': (0, 139, 139),
+    'darkgoldenrod': (184, 134, 11), 'darkgray': (169, 169, 169), 'darkgrey': (169, 169, 169),
+    'darkgreen': (0, 100, 0), 'darkkhaki': (189, 183, 107), 'darkmagenta': (139, 0, 139),
+    'darkolivegreen': (85, 107, 47), 'darkorange': (255, 140, 0), 'darkorchid': (153, 50, 204),
+    'darkred': (139, 0, 0), 'darksalmon': (233, 150, 122), 'darkseagreen': (143, 188, 143),
+    'darkslateblue': (72, 61, 139), 'darkslategray': (47, 79, 79), 'darkslategrey': (47, 79, 79),
+    'darkturquoise': (0, 206, 209), 'darkviolet': (148, 0, 211), 'deeppink': (255, 20, 147),
+    'deepskyblue': (0, 191, 255), 'dimgray': (105, 105, 105), 'dimgrey': (105, 105, 105),
+    'dodgerblue': (30, 144, 255), 'firebrick': (178, 34, 34), 'floralwhite': (255, 250, 240),
+    'forestgreen': (34, 139, 34), 'fuchsia': (255, 0, 255), 'gainsboro': (220, 220, 220),
+    'ghostwhite': (248, 248, 255), 'gold': (255, 215, 0), 'goldenrod': (218, 165, 32),
+    'gray': (128, 128, 128), 'grey': (128, 128, 128), 'green': (0, 128, 0), 'greenyellow': (173, 255, 47),
+    'honeydew': (240, 255, 240), 'hotpink': (255, 105, 180), 'indianred': (205, 92, 92),
+    'indigo': (75, 0, 130), 'ivory': (255, 255, 240), 'khaki': (240, 230, 140), 'lavender': (230, 230, 250),
+    'lavenderblush': (255, 240, 245), 'lawngreen': (124, 252, 0), 'lemonchiffon': (255, 250, 205),
+    'lightblue': (173, 216, 230), 'lightcoral': (240, 128, 128), 'lightcyan': (224, 255, 255),
+    'lightgoldenrodyellow': (250, 250, 210), 'lightgray': (211, 211, 211), 'lightgrey': (211, 211, 211),
+    'lightgreen': (144, 238, 144), 'lightpink': (255, 182, 193), 'lightsalmon': (255, 160, 122),
+    'lightseagreen': (32, 178, 170), 'lightskyblue': (135, 206, 250), 'lightslategray': (119, 136, 153),
+    'lightslategrey': (119, 136, 153), 'lightsteelblue': (176, 196, 222), 'lightyellow': (255, 255, 224),
+    'lime': (0, 255, 0), 'limegreen': (50, 205, 50), 'linen': (250, 240, 230), 'magenta': (255, 0, 255),
+    'maroon': (128, 0, 0), 'mediumaquamarine': (102, 205, 170), 'mediumblue': (0, 0, 205),
+    'mediumorchid': (186, 85, 211), 'mediumpurple': (147, 112, 219), 'mediumseagreen': (60, 179, 113),
+    'mediumslateblue': (123, 104, 238), 'mediumspringgreen': (0, 250, 154),
+    'mediumturquoise': (72, 209, 204), 'mediumvioletred': (199, 21, 133), 'midnightblue': (25, 25, 112),
+    'mintcream': (245, 255, 250), 'mistyrose': (255, 228, 225), 'moccasin': (255, 228, 181),
+    'navajowhite': (255, 222, 173), 'navy': (0, 0, 128), 'navyblue': (0, 0, 128),
+    'oldlace': (253, 245, 230), 'olive': (128, 128, 0), 'olivedrab': (107, 142, 35),
+    'orange': (255, 165, 0), 'orangered': (255, 69, 0), 'orchid': (218, 112, 214),
+    'palegoldenrod': (238, 232, 170), 'palegreen': (152, 251, 152), 'paleturquoise': (175, 238, 238),
+    'palevioletred': (219, 112, 147), 'papayawhip': (255, 239, 213), 'peachpuff': (255, 218, 185),
+    'peru': (205, 133, 63), 'pink': (255, 192, 203), 'plum': (221, 160, 221), 'powderblue': (176, 224, 230),
+    'purple': (128, 0, 128), 'rebeccapurple': (102, 51, 153), 'red': (255, 0, 0),
+    'rosybrown': (188, 143, 143), 'royalblue': (65, 105, 225), 'saddlebrown': (139, 69, 19),
+    'salmon': (250, 128, 114), 'sandybrown': (244, 164, 96), 'seagreen': (46, 139, 87),
+    'seashell': (255, 245, 238), 'sienna': (160, 82, 45), 'silver': (192, 192, 192),
+    'skyblue': (135, 206, 235), 'slateblue': (106, 90, 205), 'slategray': (112, 128, 144),
+    'slategrey': (112, 128, 144), 'snow': (255, 250, 250), 'springgreen': (0, 255, 127),
+    'steelblue': (70, 130, 180), 'tan': (210, 180, 140), 'teal': (0, 128, 128), 'thistle': (216, 191, 216),
+    'tomato': (255, 99, 71), 'turquoise': (64, 224, 208), 'violet': (238, 130, 238),
+    'wheat': (245, 222, 179), 'white': (255, 255, 255), 'whitesmoke': (245, 245, 245),
+    'yellow': (255, 255, 0), 'yellowgreen': (154, 205, 50)
+}
+
+valid_locations = {  # x, y in 90*90
+    'in the center': (45, 45),
+    'on the left': (15, 45),
+    'on the right': (75, 45),
+    'on the top': (45, 15),
+    'on the bottom': (45, 75),
+    'on the top-left': (15, 15),
+    'on the top-right': (75, 15),
+    'on the bottom-left': (15, 75),
+    'on the bottom-right': (75, 75)
+}
+
+valid_offsets = {  # x, y in 90*90
+    'no offset': (0, 0),
+    'slightly to the left': (-10, 0),
+    'slightly to the right': (10, 0),
+    'slightly to the upper': (0, -10),
+    'slightly to the lower': (0, 10),
+    'slightly to the upper-left': (-10, -10),
+    'slightly to the upper-right': (10, -10),
+    'slightly to the lower-left': (-10, 10),
+    'slightly to the lower-right': (10, 10)}
+
+valid_areas = {  # w, h in 90*90
+    "a small square area": (50, 50),
+    "a small vertical area": (40, 60),
+    "a small horizontal area": (60, 40),
+    "a medium-sized square area": (60, 60),
+    "a medium-sized vertical area": (50, 80),
+    "a medium-sized horizontal area": (80, 50),
+    "a large square area": (70, 70),
+    "a large vertical area": (60, 90),
+    "a large horizontal area": (90, 60)
+}
+
+def safe_str(x):
+    return x.strip(',. ') + '.'
+
+def closest_name(input_str, options):
+    input_str = input_str.lower()
+
+    closest_match = difflib.get_close_matches(input_str, list(options.keys()), n=1, cutoff=0.5)
+    assert isinstance(closest_match, list) and len(closest_match) > 0, f'The value [{input_str}] is not valid!'
+    result = closest_match[0]
+
+    if result != input_str:
+        print(f'Automatically corrected [{input_str}] -> [{result}].')
+
+    return result
+
+class Canvas:
+    @staticmethod
+    def from_bot_response(response: str):
+        
+        matched = re.search(r'```python\n(.*?)\n```', response, re.DOTALL)
+        assert matched, 'Response does not contain codes!'
+        code_content = matched.group(1)
+        assert 'canvas = Canvas()' in code_content, 'Code block must include valid canvas var!'
+        local_vars = {'Canvas': Canvas}
+        exec(code_content, {}, local_vars)
+        canvas = local_vars.get('canvas', None)
+        assert isinstance(canvas, Canvas), 'Code block must produce valid canvas var!'
+        return canvas
+
+    def __init__(self):
+        self.components = []
+        self.color = None
+        self.record_tags = True
+        self.prefixes = []
+        self.suffixes = []
+        return
+
+    def set_global_description(self, description: str, detailed_descriptions: list[str], tags: str,
+                               HTML_web_color_name: str):
+        assert isinstance(description, str), 'Global description is not valid!'
+        assert isinstance(detailed_descriptions, list) and all(isinstance(item, str) for item in detailed_descriptions), \
+            'Global detailed_descriptions is not valid!'
+        assert isinstance(tags, str), 'Global tags is not valid!'
+
+        HTML_web_color_name = closest_name(HTML_web_color_name, valid_colors)
+        self.color = np.array([[valid_colors[HTML_web_color_name]]], dtype=np.uint8)
+
+        self.prefixes = [description]
+        self.suffixes = detailed_descriptions
+
+        if self.record_tags:
+            self.suffixes = self.suffixes + [tags]
+
+        self.prefixes = [safe_str(x) for x in self.prefixes]
+        self.suffixes = [safe_str(x) for x in self.suffixes]
+
+        return
+
+    def add_local_description(self, location: str, offset: str, area: str, distance_to_viewer: float, description: str,
+                              detailed_descriptions: list[str], tags: str, atmosphere: str, style: str,
+                              quality_meta: str, HTML_web_color_name: str):
+        assert isinstance(description, str), 'Local description is wrong!'
+        assert isinstance(distance_to_viewer, (int, float)) and distance_to_viewer > 0, \
+            f'The distance_to_viewer for [{description}] is not positive float number!'
+        assert isinstance(detailed_descriptions, list) and all(isinstance(item, str) for item in detailed_descriptions), \
+            f'The detailed_descriptions for [{description}] is not valid!'
+        assert isinstance(tags, str), f'The tags for [{description}] is not valid!'
+        assert isinstance(atmosphere, str), f'The atmosphere for [{description}] is not valid!'
+        assert isinstance(style, str), f'The style for [{description}] is not valid!'
+        assert isinstance(quality_meta, str), f'The quality_meta for [{description}] is not valid!'
+
+        location = closest_name(location, valid_locations)
+        offset = closest_name(offset, valid_offsets)
+        area = closest_name(area, valid_areas)
+        HTML_web_color_name = closest_name(HTML_web_color_name, valid_colors)
+
+        xb, yb = valid_locations[location]
+        xo, yo = valid_offsets[offset]
+        w, h = valid_areas[area]
+        rect = (yb + yo - h // 2, yb + yo + h // 2, xb + xo - w // 2, xb + xo + w // 2)
+        rect = [max(0, min(90, i)) for i in rect]
+        color = np.array([[valid_colors[HTML_web_color_name]]], dtype=np.uint8)
+
+        prefixes = self.prefixes + [description]
+        suffixes = detailed_descriptions
+
+        if self.record_tags:
+            suffixes = suffixes + [tags, atmosphere, style, quality_meta]
+
+        prefixes = [safe_str(x) for x in prefixes]
+        suffixes = [safe_str(x) for x in suffixes]
+
+        self.components.append(dict(
+            rect=rect,
+            distance_to_viewer=distance_to_viewer,
+            color=color,
+            prefixes=prefixes,
+            suffixes=suffixes
+        ))
+
+        return
+
+    def process(self):
+        # sort components
+        self.components = sorted(self.components, key=lambda x: x['distance_to_viewer'], reverse=True)
+
+        # compute initial latent
+        # print(self.color)
+        initial_latent = np.zeros(shape=(90, 90, 3), dtype=np.float32) + self.color
+
+        for component in self.components:
+            a, b, c, d = component['rect']
+            initial_latent[a:b, c:d] = 0.7 * component['color'] + 0.3 * initial_latent[a:b, c:d]
+
+        initial_latent = initial_latent.clip(0, 255).astype(np.uint8)
+
+        # compute conditions
+
+        bag_of_conditions = [
+            dict(mask=np.ones(shape=(90, 90), dtype=np.float32), prefixes=self.prefixes, suffixes=self.suffixes)
+        ]
+
+        for i, component in enumerate(self.components):
+            a, b, c, d = component['rect']
+            m = np.zeros(shape=(90, 90), dtype=np.float32)
+            m[a:b, c:d] = 1.0
+            bag_of_conditions.append(dict(
+                mask=m,
+                prefixes=component['prefixes'],
+                suffixes=component['suffixes']
+            ))
+            
+        return dict(
+            initial_latent=initial_latent,
+            bag_of_conditions=bag_of_conditions,
+        )
+
+
+class OmostPromter(torch.nn.Module):
+    
+    def __init__(self,model = None,tokenizer = None, template = "",device="cpu"):
+        super().__init__()
+        self.model=model
+        self.tokenizer = tokenizer
+        self.device = device
+        if template == "":
+            template = r'''You are a helpful AI assistant to compose images using the below python class `Canvas`:
+            ```python
+            class Canvas:
+                def set_global_description(self, description: str, detailed_descriptions: list[str], tags: str, HTML_web_color_name: str):
+                    pass
+
+                def add_local_description(self, location: str, offset: str, area: str, distance_to_viewer: float, description: str, detailed_descriptions: list[str], tags: str, atmosphere: str, style: str, quality_meta: str, HTML_web_color_name: str):
+                    assert location in ["in the center", "on the left", "on the right", "on the top", "on the bottom", "on the top-left", "on the top-right", "on the bottom-left", "on the bottom-right"]
+                    assert offset in ["no offset", "slightly to the left", "slightly to the right", "slightly to the upper", "slightly to the lower", "slightly to the upper-left", "slightly to the upper-right", "slightly to the lower-left", "slightly to the lower-right"]
+                    assert area in ["a small square area", "a small vertical area", "a small horizontal area", "a medium-sized square area", "a medium-sized vertical area", "a medium-sized horizontal area", "a large square area", "a large vertical area", "a large horizontal area"]
+                    assert distance_to_viewer > 0
+                    pass
+            ```'''
+        self.template = template
+        
+    @staticmethod
+    def from_model_manager(model_manager: ModelManager):
+        model, model_path = model_manager.fetch_model("omost_prompt", require_model_path=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_path)
+        omost = OmostPromter(
+            model=model,
+            tokenizer=tokenizer,
+        )
+        return omost
+    
+    
+    def __call__(self,prompt_dict:dict):
+        raw_prompt=prompt_dict["prompt"]
+        conversation = [{"role": "system", "content": self.template}]
+        conversation.append({"role": "user", "content": raw_prompt})
+        
+        input_ids = self.tokenizer.apply_chat_template(conversation, return_tensors="pt", add_generation_prompt=True).to(self.device)
+        streamer = TextIteratorStreamer(self.tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+
+        generate_kwargs = dict(
+            input_ids=input_ids,
+            streamer=streamer,
+            # stopping_criteria=stopping_criteria,
+            # max_new_tokens=max_new_tokens,
+            do_sample=True,
+            # temperature=temperature,
+            # top_p=top_p,
+        )
+        self.model.generate(**generate_kwargs)
+        outputs = []
+        for text in streamer:
+            outputs.append(text)
+        llm_outputs = "".join(outputs)
+        
+        canvas = Canvas.from_bot_response(llm_outputs)
+        canvas_output = canvas.process()
+        
+        prompts = [" ".join(_["prefixes"]+_["suffixes"]) for _ in canvas_output["bag_of_conditions"]]
+        canvas_output["prompt"] = prompts[0]
+        canvas_output["prompts"] = prompts[1:]
+        
+        raw_masks = [_["mask"] for _ in canvas_output["bag_of_conditions"]]
+        masks=[]
+        for mask in raw_masks:
+            mask[mask>0.5]=255
+            mask = np.stack([mask] * 3, axis=-1).astype("uint8")
+            masks.append(Image.fromarray(mask))
+            
+        canvas_output["masks"] = masks
+        
+        prompt_dict.update(canvas_output)
+        return prompt_dict
+    
+    
+        
+    
--- a/diffsynth/prompters/prompt_refiners.py
+++ b/diffsynth/prompters/prompt_refiners.py
@@ -1,8 +1,7 @@
 from transformers import AutoTokenizer
 from ..models.model_manager import ModelManager
 import torch
-
-
+from .omost import OmostPromter

 class BeautifulPrompt(torch.nn.Module):
    def __init__(self, tokenizer_path=None, model=None, template=""):
@@ -13,8 +12,8 @@ class BeautifulPrompt(torch.nn.Module):


    @staticmethod
-    def from_model_manager(model_nameger: ModelManager):
-        model, model_path = model_nameger.fetch_model("beautiful_prompt", require_model_path=True)
+    def from_model_manager(model_manager: ModelManager):
+        model, model_path = model_manager.fetch_model("beautiful_prompt", require_model_path=True)
        template = 'Instruction: Give a simple description of the image to generate a drawing prompt.\nInput: {raw_prompt}\nOutput:'
        if model_path.endswith("v2"):
            template = """Converts a simple image description into a prompt. \
@@ -63,8 +62,8 @@ class Translator(torch.nn.Module):


    @staticmethod
-    def from_model_manager(model_nameger: ModelManager):
-        model, model_path = model_nameger.fetch_model("translator", require_model_path=True)
+    def from_model_manager(model_manager: ModelManager):
+        model, model_path = model_manager.fetch_model("translator", require_model_path=True)
        translator = Translator(tokenizer_path=model_path, model=model)
        return translator
    
--- a/examples/image_synthesis/omost_flux_text_to_image.py
+++ b/examples/image_synthesis/omost_flux_text_to_image.py
@@ -0,0 +1,24 @@
+import torch
+from diffsynth import download_models, ModelManager, OmostPromter, FluxImagePipeline
+
+
+download_models(["OmostPrompt"])
+download_models(["FLUX.1-dev"])
+
+model_manager = ModelManager(torch_dtype=torch.bfloat16)
+model_manager.load_models([
+    "models/OmostPrompt/omost-llama-3-8b-4bits",
+    "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors",
+    "models/FLUX/FLUX.1-dev/text_encoder_2",
+    "models/FLUX/FLUX.1-dev/ae.safetensors",
+    "models/FLUX/FLUX.1-dev/flux1-dev.safetensors"
+])
+
+pipe = FluxImagePipeline.from_model_manager(model_manager, prompt_extender_classes=[OmostPromter])
+
+torch.manual_seed(0)
+image = pipe(
+    prompt="an image of a witch who is releasing ice and fire magic",
+    num_inference_steps=30, embedded_guidance=3.5
+)
+image.save("image_omost.jpg")