mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-23 17:38:10 +00:00
add new quality metric
This commit is contained in:
@@ -1,22 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"quick_gelu": true,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": [
|
||||
3,
|
||||
4,
|
||||
23,
|
||||
3
|
||||
],
|
||||
"width": 64,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": [
|
||||
3,
|
||||
4,
|
||||
23,
|
||||
3
|
||||
],
|
||||
"width": 64,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"quick_gelu": true,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": [
|
||||
3,
|
||||
4,
|
||||
6,
|
||||
3
|
||||
],
|
||||
"width": 64,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": [
|
||||
3,
|
||||
4,
|
||||
6,
|
||||
3
|
||||
],
|
||||
"width": 64,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 384,
|
||||
"layers": [
|
||||
6,
|
||||
8,
|
||||
18,
|
||||
8
|
||||
],
|
||||
"width": 96,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"image_size": 288,
|
||||
"layers": [
|
||||
4,
|
||||
6,
|
||||
10,
|
||||
6
|
||||
],
|
||||
"width": 80,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 448,
|
||||
"layers": [
|
||||
3,
|
||||
15,
|
||||
36,
|
||||
10
|
||||
],
|
||||
"width": 128,
|
||||
"patch_size": null
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"image_size": 240,
|
||||
"layers": 12,
|
||||
"width": 896,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 896,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"image_size": 256,
|
||||
"layers": 12,
|
||||
"width": 896,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"quick_gelu": true,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 32,
|
||||
"width": 1280,
|
||||
"head_width": 80,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 24
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 280,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 336,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 320,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 384,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 512,
|
||||
"patch_size": 16,
|
||||
"ls_init_value": 1e-4
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 384,
|
||||
"heads": 6,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 512,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 384,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 512,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 384,
|
||||
"heads": 6,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 512,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 256,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 384,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 256,
|
||||
"heads": 4,
|
||||
"layers": 10
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 384,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 384,
|
||||
"patch_size": 16
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 384,
|
||||
"heads": 6,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 256,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 384,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 256,
|
||||
"heads": 4,
|
||||
"layers": 10
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 384,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 384,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 384,
|
||||
"heads": 6,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1280,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 48,
|
||||
"width": 1664,
|
||||
"head_width": 104,
|
||||
"mlp_ratio": 4.9231,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1280,
|
||||
"heads": 20,
|
||||
"layers": 32
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1280,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 56,
|
||||
"width": 1792,
|
||||
"head_width": 112,
|
||||
"mlp_ratio": 8.5715,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1280,
|
||||
"heads": 20,
|
||||
"layers": 36
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 40,
|
||||
"width": 1408,
|
||||
"head_width": 88,
|
||||
"mlp_ratio": 4.3637,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 24
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32,
|
||||
"attentional_pool": true,
|
||||
"attn_pooler_heads": 8,
|
||||
"output_tokens": true
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 76,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12,
|
||||
"embed_cls": true,
|
||||
"output_tokens": true
|
||||
},
|
||||
"multimodal_cfg": {
|
||||
"context_length": 76,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12,
|
||||
"attn_pooler_heads": 8
|
||||
},
|
||||
"custom_text": true
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 24,
|
||||
"width": 1024,
|
||||
"patch_size": 14,
|
||||
"attentional_pool": true,
|
||||
"attn_pooler_heads": 8,
|
||||
"output_tokens": true
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 76,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12,
|
||||
"embed_cls": true,
|
||||
"output_tokens": true
|
||||
},
|
||||
"multimodal_cfg": {
|
||||
"context_length": 76,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12,
|
||||
"attn_pooler_heads": 12
|
||||
},
|
||||
"custom_text": true
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"multimodal_cfg": {
|
||||
"width": 768,
|
||||
"context_length": 76,
|
||||
"vocab_size": 64000,
|
||||
"mlp_ratio": 4,
|
||||
"layers": 12,
|
||||
"dim_head": 64,
|
||||
"heads": 12,
|
||||
"n_queries": 256,
|
||||
"attn_pooler_heads": 8
|
||||
},
|
||||
"vision_cfg": {
|
||||
"image_size": 288,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 18,
|
||||
"output_tokens": true
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 76,
|
||||
"vocab_size": 64000,
|
||||
"layers": 12,
|
||||
"heads": 12,
|
||||
"width": 768,
|
||||
"embed_cls": true,
|
||||
"output_tokens": true
|
||||
},
|
||||
"custom_text": true
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32,
|
||||
"output_tokens": true
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "roberta-base",
|
||||
"hf_tokenizer_name": "roberta-base",
|
||||
"proj": "linear",
|
||||
"width": 768,
|
||||
"output_tokens": true
|
||||
},
|
||||
"multimodal_cfg": {
|
||||
"context_length": 76,
|
||||
"width": 768,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
},
|
||||
"custom_text": true
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_base",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_base",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 256
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_base",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 320
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_large",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_large",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "mlp",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 256
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 16
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 768,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_large",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "mlp",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 320
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 768,
|
||||
"heads": 12,
|
||||
"layers": 16
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_small",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_tiny",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_xlarge",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 256
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 20
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_xxlarge",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 256
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 24
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "convnext_xxlarge",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"timm_drop": 0.0,
|
||||
"timm_drop_path": 0.1,
|
||||
"image_size": 320
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 1024,
|
||||
"heads": 16,
|
||||
"layers": 24
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "google/mt5-base",
|
||||
"hf_tokenizer_name": "google/mt5-base",
|
||||
"proj": "mlp",
|
||||
"pooler_type": "mean_pooler"
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 32,
|
||||
"width": 1280,
|
||||
"head_width": 80,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "google/mt5-xl",
|
||||
"hf_tokenizer_name": "google/mt5-xl",
|
||||
"proj": "mlp",
|
||||
"pooler_type": "mean_pooler"
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"quick_gelu": true,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "roberta-base",
|
||||
"hf_tokenizer_name": "roberta-base",
|
||||
"proj": "mlp",
|
||||
"pooler_type": "mean_pooler"
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 640,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "swin_base_patch4_window7_224",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 640,
|
||||
"heads": 10,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "vit_medium_patch16_gap_256",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"image_size": 256
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"timm_model_name": "vit_relpos_medium_patch16_cls_224",
|
||||
"timm_model_pretrained": false,
|
||||
"timm_pool": "",
|
||||
"timm_proj": "linear",
|
||||
"image_size": 224
|
||||
},
|
||||
"text_cfg": {
|
||||
"context_length": 77,
|
||||
"vocab_size": 49408,
|
||||
"width": 512,
|
||||
"heads": 8,
|
||||
"layers": 12
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
{
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 12,
|
||||
"width": 768,
|
||||
"patch_size": 32
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "xlm-roberta-base",
|
||||
"hf_tokenizer_name": "xlm-roberta-base",
|
||||
"proj": "mlp",
|
||||
"pooler_type": "mean_pooler"
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"embed_dim": 1024,
|
||||
"vision_cfg": {
|
||||
"image_size": 224,
|
||||
"layers": 32,
|
||||
"width": 1280,
|
||||
"head_width": 80,
|
||||
"patch_size": 14
|
||||
},
|
||||
"text_cfg": {
|
||||
"hf_model_name": "xlm-roberta-large",
|
||||
"hf_tokenizer_name": "xlm-roberta-large",
|
||||
"proj": "mlp",
|
||||
"pooler_type": "mean_pooler"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user