add new quality metric

This commit is contained in:
YunhongLu-ZJU
2025-02-17 14:42:20 +08:00
parent 77d0f4d297
commit 991ba162bd
69 changed files with 88 additions and 1461 deletions

View File

@@ -1,22 +0,0 @@
{
"embed_dim": 512,
"quick_gelu": true,
"vision_cfg": {
"image_size": 224,
"layers": [
3,
4,
23,
3
],
"width": 64,
"patch_size": null
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,21 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"image_size": 224,
"layers": [
3,
4,
23,
3
],
"width": 64,
"patch_size": null
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,22 +0,0 @@
{
"embed_dim": 1024,
"quick_gelu": true,
"vision_cfg": {
"image_size": 224,
"layers": [
3,
4,
6,
3
],
"width": 64,
"patch_size": null
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,21 +0,0 @@
{
"embed_dim": 1024,
"vision_cfg": {
"image_size": 224,
"layers": [
3,
4,
6,
3
],
"width": 64,
"patch_size": null
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,21 +0,0 @@
{
"embed_dim": 768,
"vision_cfg": {
"image_size": 384,
"layers": [
6,
8,
18,
8
],
"width": 96,
"patch_size": null
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 768,
"heads": 12,
"layers": 12
}
}

View File

@@ -1,21 +0,0 @@
{
"embed_dim": 640,
"vision_cfg": {
"image_size": 288,
"layers": [
4,
6,
10,
6
],
"width": 80,
"patch_size": null
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 640,
"heads": 10,
"layers": 12
}
}

View File

@@ -1,21 +0,0 @@
{
"embed_dim": 1024,
"vision_cfg": {
"image_size": 448,
"layers": [
3,
15,
36,
10
],
"width": 128,
"patch_size": null
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 1024,
"heads": 16,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 640,
"vision_cfg": {
"image_size": 240,
"layers": 12,
"width": 896,
"patch_size": 16
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 640,
"heads": 10,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 640,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 896,
"patch_size": 16
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 640,
"heads": 10,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 768,
"patch_size": 16
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 640,
"vision_cfg": {
"image_size": 256,
"layers": 12,
"width": 896,
"patch_size": 32
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 640,
"heads": 10,
"layers": 12
}
}

View File

@@ -1,17 +0,0 @@
{
"embed_dim": 512,
"quick_gelu": true,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 768,
"patch_size": 32
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 768,
"patch_size": 32
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,17 +0,0 @@
{
"embed_dim": 1024,
"vision_cfg": {
"image_size": 224,
"layers": 32,
"width": 1280,
"head_width": 80,
"patch_size": 16
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 1024,
"heads": 16,
"layers": 24
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 768,
"vision_cfg": {
"image_size": 280,
"layers": 24,
"width": 1024,
"patch_size": 14
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 768,
"heads": 12,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 768,
"vision_cfg": {
"image_size": 336,
"layers": 24,
"width": 1024,
"patch_size": 14
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 768,
"heads": 12,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 768,
"vision_cfg": {
"image_size": 224,
"layers": 24,
"width": 1024,
"patch_size": 14
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 768,
"heads": 12,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 768,
"vision_cfg": {
"image_size": 320,
"layers": 24,
"width": 1024,
"patch_size": 16
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 768,
"heads": 12,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 768,
"vision_cfg": {
"image_size": 224,
"layers": 24,
"width": 1024,
"patch_size": 16
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 768,
"heads": 12,
"layers": 12
}
}

View File

@@ -1,17 +0,0 @@
{
"embed_dim": 384,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 512,
"patch_size": 16,
"ls_init_value": 1e-4
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 384,
"heads": 6,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 512,
"patch_size": 16
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 384,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 512,
"patch_size": 32
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 384,
"heads": 6,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 512,
"patch_size": 32
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 256,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 384,
"patch_size": 16
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 256,
"heads": 4,
"layers": 10
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 384,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 384,
"patch_size": 16
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 384,
"heads": 6,
"layers": 12
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 256,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 384,
"patch_size": 32
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 256,
"heads": 4,
"layers": 10
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 384,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 384,
"patch_size": 32
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 384,
"heads": 6,
"layers": 12
}
}

View File

@@ -1,18 +0,0 @@
{
"embed_dim": 1280,
"vision_cfg": {
"image_size": 224,
"layers": 48,
"width": 1664,
"head_width": 104,
"mlp_ratio": 4.9231,
"patch_size": 14
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 1280,
"heads": 20,
"layers": 32
}
}

View File

@@ -1,18 +0,0 @@
{
"embed_dim": 1280,
"vision_cfg": {
"image_size": 224,
"layers": 56,
"width": 1792,
"head_width": 112,
"mlp_ratio": 8.5715,
"patch_size": 14
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 1280,
"heads": 20,
"layers": 36
}
}

View File

@@ -1,18 +0,0 @@
{
"embed_dim": 1024,
"vision_cfg": {
"image_size": 224,
"layers": 40,
"width": 1408,
"head_width": 88,
"mlp_ratio": 4.3637,
"patch_size": 14
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 1024,
"heads": 16,
"layers": 24
}
}

View File

@@ -1,30 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 768,
"patch_size": 32,
"attentional_pool": true,
"attn_pooler_heads": 8,
"output_tokens": true
},
"text_cfg": {
"context_length": 76,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12,
"embed_cls": true,
"output_tokens": true
},
"multimodal_cfg": {
"context_length": 76,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12,
"attn_pooler_heads": 8
},
"custom_text": true
}

View File

@@ -1,30 +0,0 @@
{
"embed_dim": 768,
"vision_cfg": {
"image_size": 224,
"layers": 24,
"width": 1024,
"patch_size": 14,
"attentional_pool": true,
"attn_pooler_heads": 8,
"output_tokens": true
},
"text_cfg": {
"context_length": 76,
"vocab_size": 49408,
"width": 768,
"heads": 12,
"layers": 12,
"embed_cls": true,
"output_tokens": true
},
"multimodal_cfg": {
"context_length": 76,
"vocab_size": 49408,
"width": 768,
"heads": 12,
"layers": 12,
"attn_pooler_heads": 12
},
"custom_text": true
}

View File

@@ -1,31 +0,0 @@
{
"embed_dim": 512,
"multimodal_cfg": {
"width": 768,
"context_length": 76,
"vocab_size": 64000,
"mlp_ratio": 4,
"layers": 12,
"dim_head": 64,
"heads": 12,
"n_queries": 256,
"attn_pooler_heads": 8
},
"vision_cfg": {
"image_size": 288,
"layers": 12,
"width": 768,
"patch_size": 18,
"output_tokens": true
},
"text_cfg": {
"context_length": 76,
"vocab_size": 64000,
"layers": 12,
"heads": 12,
"width": 768,
"embed_cls": true,
"output_tokens": true
},
"custom_text": true
}

View File

@@ -1,24 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 768,
"patch_size": 32,
"output_tokens": true
},
"text_cfg": {
"hf_model_name": "roberta-base",
"hf_tokenizer_name": "roberta-base",
"proj": "linear",
"width": 768,
"output_tokens": true
},
"multimodal_cfg": {
"context_length": 76,
"width": 768,
"heads": 8,
"layers": 12
},
"custom_text": true
}

View File

@@ -1,19 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"timm_model_name": "convnext_base",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 224
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,19 +0,0 @@
{
"embed_dim": 640,
"vision_cfg": {
"timm_model_name": "convnext_base",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 256
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 640,
"heads": 10,
"layers": 12
}
}

View File

@@ -1,19 +0,0 @@
{
"embed_dim": 640,
"vision_cfg": {
"timm_model_name": "convnext_base",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 320
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 640,
"heads": 10,
"layers": 12
}
}

View File

@@ -1,19 +0,0 @@
{
"embed_dim": 768,
"vision_cfg": {
"timm_model_name": "convnext_large",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 224
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 768,
"heads": 12,
"layers": 12
}
}

View File

@@ -1,19 +0,0 @@
{
"embed_dim": 768,
"vision_cfg": {
"timm_model_name": "convnext_large",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "mlp",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 256
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 768,
"heads": 12,
"layers": 16
}
}

View File

@@ -1,19 +0,0 @@
{
"embed_dim": 768,
"vision_cfg": {
"timm_model_name": "convnext_large",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "mlp",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 320
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 768,
"heads": 12,
"layers": 16
}
}

View File

@@ -1,19 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"timm_model_name": "convnext_small",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 224
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,19 +0,0 @@
{
"embed_dim": 1024,
"vision_cfg": {
"timm_model_name": "convnext_tiny",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 224
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,19 +0,0 @@
{
"embed_dim": 1024,
"vision_cfg": {
"timm_model_name": "convnext_xlarge",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 256
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 1024,
"heads": 16,
"layers": 20
}
}

View File

@@ -1,19 +0,0 @@
{
"embed_dim": 1024,
"vision_cfg": {
"timm_model_name": "convnext_xxlarge",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 256
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 1024,
"heads": 16,
"layers": 24
}
}

View File

@@ -1,19 +0,0 @@
{
"embed_dim": 1024,
"vision_cfg": {
"timm_model_name": "convnext_xxlarge",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 320
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 1024,
"heads": 16,
"layers": 24
}
}

View File

@@ -1,15 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 768,
"patch_size": 32
},
"text_cfg": {
"hf_model_name": "google/mt5-base",
"hf_tokenizer_name": "google/mt5-base",
"proj": "mlp",
"pooler_type": "mean_pooler"
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 1024,
"vision_cfg": {
"image_size": 224,
"layers": 32,
"width": 1280,
"head_width": 80,
"patch_size": 14
},
"text_cfg": {
"hf_model_name": "google/mt5-xl",
"hf_tokenizer_name": "google/mt5-xl",
"proj": "mlp",
"pooler_type": "mean_pooler"
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 512,
"quick_gelu": true,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 768,
"patch_size": 32
},
"text_cfg": {
"hf_model_name": "roberta-base",
"hf_tokenizer_name": "roberta-base",
"proj": "mlp",
"pooler_type": "mean_pooler"
}
}

View File

@@ -1,17 +0,0 @@
{
"embed_dim": 640,
"vision_cfg": {
"timm_model_name": "swin_base_patch4_window7_224",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"image_size": 224
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 640,
"heads": 10,
"layers": 12
}
}

View File

@@ -1,17 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"timm_model_name": "vit_medium_patch16_gap_256",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"image_size": 256
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,17 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"timm_model_name": "vit_relpos_medium_patch16_cls_224",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"image_size": 224
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}

View File

@@ -1,15 +0,0 @@
{
"embed_dim": 512,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 768,
"patch_size": 32
},
"text_cfg": {
"hf_model_name": "xlm-roberta-base",
"hf_tokenizer_name": "xlm-roberta-base",
"proj": "mlp",
"pooler_type": "mean_pooler"
}
}

View File

@@ -1,16 +0,0 @@
{
"embed_dim": 1024,
"vision_cfg": {
"image_size": 224,
"layers": 32,
"width": 1280,
"head_width": 80,
"patch_size": 14
},
"text_cfg": {
"hf_model_name": "xlm-roberta-large",
"hf_tokenizer_name": "xlm-roberta-large",
"proj": "mlp",
"pooler_type": "mean_pooler"
}
}

View File

@@ -19,7 +19,10 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
@lru_cache()
def default_bpe():
return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz")
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.abspath(os.path.join(current_dir, '../../../../'))
quality_metric_path = os.path.join(project_root, 'models', 'QualityMetric')
return os.path.join(quality_metric_path, "bpe_simple_vocab_16e6.txt.gz")
@lru_cache()