mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-22 16:50:47 +00:00
add new quality metric
This commit is contained in:
Binary file not shown.
@@ -1,22 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"quick_gelu": true,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": [
|
||||
3,
|
||||
4,
|
||||
23,
|
||||
3
|
||||
],
|
||||
"width": 64,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": [
|
||||
3,
|
||||
4,
|
||||
23,
|
||||
3
|
||||
],
|
||||
"width": 64,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"quick_gelu": true,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": [
|
||||
3,
|
||||
4,
|
||||
6,
|
||||
3
|
||||
],
|
||||
"width": 64,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": [
|
||||
3,
|
||||
4,
|
||||
6,
|
||||
3
|
||||
],
|
||||
"width": 64,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 384,
|
||||
"layers": [
|
||||
6,
|
||||
8,
|
||||
18,
|
||||
8
|
||||
],
|
||||
"width": 96,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"image_size": 288,
|
||||
"layers": [
|
||||
4,
|
||||
6,
|
||||
10,
|
||||
6
|
||||
],
|
||||
"width": 80,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 448,
|
||||
"layers": [
|
||||
3,
|
||||
15,
|
||||
36,
|
||||
10
|
||||
],
|
||||
"width": 128,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"image_size": 240,
|
||||
"layers": 12,
|
||||
"width": 896,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 896,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"image_size": 256,
|
||||
"layers": 12,
|
||||
"width": 896,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"quick_gelu": true,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 32,
|
||||
"width": 1280,
|
||||
"head_width": 80,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 24
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 280,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 336,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 320,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 384,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 512,
|
||||
"patch_size": 16,
|
||||
"ls_init_value": 1e-4
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 384,
|
||||
"heads": 6,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 512,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 384,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 512,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 384,
|
||||
"heads": 6,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 512,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 256,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 384,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 256,
|
||||
"heads": 4,
|
||||
"layers": 10
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 384,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 384,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 384,
|
||||
"heads": 6,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 256,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 384,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 256,
|
||||
"heads": 4,
|
||||
"layers": 10
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 384,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 384,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 384,
|
||||
"heads": 6,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1280,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 48,
|
||||
"width": 1664,
|
||||
"head_width": 104,
|
||||
"mlp_ratio": 4.9231,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1280,
|
||||
"heads": 20,
|
||||
"layers": 32
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1280,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 56,
|
||||
"width": 1792,
|
||||
"head_width": 112,
|
||||
"mlp_ratio": 8.5715,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1280,
|
||||
"heads": 20,
|
||||
"layers": 36
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 40,
|
||||
"width": 1408,
|
||||
"head_width": 88,
|
||||
"mlp_ratio": 4.3637,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 24
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32,
|
||||
"attentional_pool": true,
|
||||
"attn_pooler_heads": 8,
|
||||
"output_tokens": true
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 76,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12,
|
||||
"embed_cls": true,
|
||||
"output_tokens": true
|
||||
},
|
||||
"multimodal_cfg": {
|
||||
"context_length": 76,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12,
|
||||
"attn_pooler_heads": 8
|
||||
},
|
||||
"custom_text": true
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 14,
|
||||
"attentional_pool": true,
|
||||
"attn_pooler_heads": 8,
|
||||
"output_tokens": true
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 76,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12,
|
||||
"embed_cls": true,
|
||||
"output_tokens": true
|
||||
},
|
||||
"multimodal_cfg": {
|
||||
"context_length": 76,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12,
|
||||
"attn_pooler_heads": 12
|
||||
},
|
||||
"custom_text": true
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"multimodal_cfg": {
|
||||
"width": 768,
|
||||
"context_length": 76,
|
||||
"vocab_size": 64000,
|
||||
"mlp_ratio": 4,
|
||||
"layers": 12,
|
||||
"dim_head": 64,
|
||||
"heads": 12,
|
||||
"n_queries": 256,
|
||||
"attn_pooler_heads": 8
|
||||
},
|
||||
"vision_cfg": {
|
||||
"image_size": 288,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 18,
|
||||
"output_tokens": true
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 76,
|
||||
"vocab_size": 64000,
|
||||
"layers": 12,
|
||||
"heads": 12,
|
||||
"width": 768,
|
||||
"embed_cls": true,
|
||||
"output_tokens": true
|
||||
},
|
||||
"custom_text": true
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32,
|
||||
"output_tokens": true
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "roberta-base",
|
||||
"hf_tokenizer_name": "roberta-base",
|
||||
"proj": "linear",
|
||||
"width": 768,
|
||||
"output_tokens": true
|
||||
},
|
||||
"multimodal_cfg": {
|
||||
"context_length": 76,
|
||||
"width": 768,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
},
|
||||
"custom_text": true
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_base",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_base",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 256
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_base",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 320
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_large",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_large",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "mlp",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 256
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 16
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_large",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "mlp",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 320
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 16
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_small",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_tiny",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_xlarge",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 256
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 20
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_xxlarge",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 256
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 24
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_xxlarge",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 320
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 24
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "google/mt5-base",
|
||||
"hf_tokenizer_name": "google/mt5-base",
|
||||
"proj": "mlp",
|
||||
"pooler_type": "mean_pooler"
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 32,
|
||||
"width": 1280,
|
||||
"head_width": 80,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "google/mt5-xl",
|
||||
"hf_tokenizer_name": "google/mt5-xl",
|
||||
"proj": "mlp",
|
||||
"pooler_type": "mean_pooler"
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"quick_gelu": true,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "roberta-base",
|
||||
"hf_tokenizer_name": "roberta-base",
|
||||
"proj": "mlp",
|
||||
"pooler_type": "mean_pooler"
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "swin_base_patch4_window7_224",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "vit_medium_patch16_gap_256",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"image_size": 256
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "vit_relpos_medium_patch16_cls_224",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "xlm-roberta-base",
|
||||
"hf_tokenizer_name": "xlm-roberta-base",
|
||||
"proj": "mlp",
|
||||
"pooler_type": "mean_pooler"
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 32,
|
||||
"width": 1280,
|
||||
"head_width": 80,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "xlm-roberta-large",
|
||||
"hf_tokenizer_name": "xlm-roberta-large",
|
||||
"proj": "mlp",
|
||||
"pooler_type": "mean_pooler"
|
||||
}
|
||||
}
|
||||
@@ -19,7 +19,10 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
|
||||
@lru_cache()
|
||||
def default_bpe():
|
||||
return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz")
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
project_root = os.path.abspath(os.path.join(current_dir, '../../../../'))
|
||||
quality_metric_path = os.path.join(project_root, 'models', 'QualityMetric')
|
||||
return os.path.join(quality_metric_path, "bpe_simple_vocab_16e6.txt.gz")
|
||||
|
||||
|
||||
@lru_cache()
|
||||
|
||||
Reference in New Issue
Block a user