Skip to content

Commit 3d717af

Browse files
committed
feat(winml): add WinML hub catalog and per-model precision configs
1 parent f7092a2 commit 3d717af

193 files changed

Lines changed: 13140 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.aitk/configs/wmk_hub_catalog.json

Lines changed: 344 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,344 @@
1+
[
2+
{
3+
"model_id": "BAAI/bge-base-en-v1.5",
4+
"description": "General-purpose English text embedding model (110M params) for retrieval and ranking.",
5+
"model_type": "bert",
6+
"task": "feature-extraction"
7+
},
8+
{
9+
"model_id": "BAAI/bge-base-en-v1.5",
10+
"description": "General-purpose English embedding model optimized for semantic similarity.",
11+
"model_type": "bert",
12+
"task": "sentence-similarity"
13+
},
14+
{
15+
"model_id": "BAAI/bge-large-en-v1.5",
16+
"description": "High-capacity English text embedding model (335M params) for semantic similarity.",
17+
"model_type": "bert",
18+
"task": "sentence-similarity"
19+
},
20+
{
21+
"model_id": "BAAI/bge-small-en-v1.5",
22+
"description": "Compact English text embedding model (33M params) for retrieval and ranking.",
23+
"model_type": "bert",
24+
"task": "feature-extraction"
25+
},
26+
{
27+
"model_id": "BAAI/bge-small-en-v1.5",
28+
"description": "Compact English embedding model optimized for semantic similarity and matching.",
29+
"model_type": "bert",
30+
"task": "sentence-similarity"
31+
},
32+
{
33+
"model_id": "Babelscape/wikineural-multilingual-ner",
34+
"description": "Multilingual BERT model for named entity recognition across 9 languages.",
35+
"model_type": "bert",
36+
"task": "token-classification"
37+
},
38+
{
39+
"model_id": "FacebookAI/roberta-base",
40+
"description": "RoBERTa base — robustly optimized BERT pretraining for English masked language modeling.",
41+
"model_type": "roberta",
42+
"task": "fill-mask"
43+
},
44+
{
45+
"model_id": "FacebookAI/roberta-large",
46+
"description": "RoBERTa large (24 layers, 355M params) for English masked language modeling.",
47+
"model_type": "roberta",
48+
"task": "fill-mask"
49+
},
50+
{
51+
"model_id": "FacebookAI/xlm-roberta-base",
52+
"description": "Multilingual RoBERTa base trained on 100 languages for masked language modeling.",
53+
"model_type": "xlm-roberta",
54+
"task": "fill-mask"
55+
},
56+
{
57+
"model_id": "FacebookAI/xlm-roberta-large",
58+
"description": "Multilingual RoBERTa large (550M params) for cross-lingual masked language modeling.",
59+
"model_type": "xlm-roberta",
60+
"task": "fill-mask"
61+
},
62+
{
63+
"model_id": "Intel/bert-base-uncased-mrpc",
64+
"description": "BERT-base fine-tuned on MRPC paraphrase corpus for text embedding extraction.",
65+
"model_type": "bert",
66+
"task": "feature-extraction"
67+
},
68+
{
69+
"model_id": "Intel/bert-base-uncased-mrpc",
70+
"description": "BERT-base fine-tuned on MRPC for paraphrase detection and text classification.",
71+
"model_type": "bert",
72+
"task": "text-classification"
73+
},
74+
{
75+
"model_id": "ProsusAI/finbert",
76+
"description": "Financial sentiment analysis model built on BERT, classifying text as positive, negative, or neutral.",
77+
"model_type": "bert",
78+
"task": "text-classification"
79+
},
80+
{
81+
"model_id": "StanfordAIMI/dinov2-base-xray-224",
82+
"description": "DINOv2 base fine-tuned on chest X-rays for medical image feature extraction.",
83+
"model_type": "dinov2",
84+
"task": "image-feature-extraction"
85+
},
86+
{
87+
"model_id": "cardiffnlp/twitter-roberta-base-sentiment-latest",
88+
"description": "RoBERTa model fine-tuned on ~124M tweets for sentiment analysis (positive/negative/neutral).",
89+
"model_type": "roberta",
90+
"task": "text-classification"
91+
},
92+
{
93+
"model_id": "dbmdz/bert-large-cased-finetuned-conll03-english",
94+
"description": "BERT-large model fine-tuned on CoNLL-2003 for English named entity recognition.",
95+
"model_type": "bert",
96+
"task": "token-classification"
97+
},
98+
{
99+
"model_id": "deepset/bert-large-uncased-whole-word-masking-squad2",
100+
"description": "BERT-large with whole word masking, fine-tuned on SQuAD 2.0 for question answering.",
101+
"model_type": "bert",
102+
"task": "question-answering"
103+
},
104+
{
105+
"model_id": "deepset/roberta-base-squad2",
106+
"description": "RoBERTa-base fine-tuned on SQuAD 2.0 for extractive question answering.",
107+
"model_type": "roberta",
108+
"task": "question-answering"
109+
},
110+
{
111+
"model_id": "deepset/tinyroberta-squad2",
112+
"description": "Compact RoBERTa model fine-tuned on SQuAD 2.0 for lightweight question answering.",
113+
"model_type": "roberta",
114+
"task": "question-answering"
115+
},
116+
{
117+
"model_id": "dslim/bert-base-NER",
118+
"description": "BERT model fine-tuned on CoNLL-2003 for named entity recognition (PER, ORG, LOC, MISC).",
119+
"model_type": "bert",
120+
"task": "token-classification"
121+
},
122+
{
123+
"model_id": "facebook/convnext-tiny-224",
124+
"description": "ConvNeXt-Tiny model combining CNN efficiency with Transformer-era design for image classification.",
125+
"model_type": "convnext",
126+
"task": "image-classification"
127+
},
128+
{
129+
"model_id": "facebook/dino-vitb16",
130+
"description": "Vision Transformer base (ViT-B/16) self-supervised with DINO for image feature extraction.",
131+
"model_type": "vit",
132+
"task": "image-feature-extraction"
133+
},
134+
{
135+
"model_id": "facebook/dino-vits16",
136+
"description": "Vision Transformer small (ViT-S/16) self-supervised with DINO for image feature extraction.",
137+
"model_type": "vit",
138+
"task": "image-feature-extraction"
139+
},
140+
{
141+
"model_id": "facebook/dinov2-base",
142+
"description": "DINOv2 base self-supervised vision model for general-purpose image feature extraction.",
143+
"model_type": "dinov2",
144+
"task": "image-feature-extraction"
145+
},
146+
{
147+
"model_id": "facebook/dinov2-large",
148+
"description": "DINOv2 large (300M params) self-supervised vision model for image feature extraction.",
149+
"model_type": "dinov2",
150+
"task": "image-feature-extraction"
151+
},
152+
{
153+
"model_id": "facebook/dinov2-small",
154+
"description": "DINOv2 small self-supervised vision model for efficient image feature extraction.",
155+
"model_type": "dinov2",
156+
"task": "image-feature-extraction"
157+
},
158+
{
159+
"model_id": "google-bert/bert-base-multilingual-cased",
160+
"description": "Multilingual BERT (104 languages) for general-purpose text embeddings.",
161+
"model_type": "bert",
162+
"task": "feature-extraction"
163+
},
164+
{
165+
"model_id": "google-bert/bert-base-multilingual-uncased",
166+
"description": "Multilingual BERT base (uncased) pretrained on 102 languages for masked language modeling.",
167+
"model_type": "bert",
168+
"task": "fill-mask"
169+
},
170+
{
171+
"model_id": "google-bert/bert-base-uncased",
172+
"description": "BERT base uncased pretrained on English text for masked language modeling.",
173+
"model_type": "bert",
174+
"task": "fill-mask"
175+
},
176+
{
177+
"model_id": "google-bert/bert-large-uncased-whole-word-masking-finetuned-squad",
178+
"description": "BERT-large with whole word masking, fine-tuned on SQuAD for question answering.",
179+
"model_type": "bert",
180+
"task": "question-answering"
181+
},
182+
{
183+
"model_id": "google/vit-base-patch16-224",
184+
"description": "Vision Transformer (ViT) pre-trained on ImageNet-21k, fine-tuned on ImageNet-1k at 224x224.",
185+
"model_type": "vit",
186+
"task": "image-classification"
187+
},
188+
{
189+
"model_id": "google/vit-base-patch16-224-in21k",
190+
"description": "Vision Transformer base pretrained on ImageNet-21k for image feature extraction.",
191+
"model_type": "vit",
192+
"task": "image-feature-extraction"
193+
},
194+
{
195+
"model_id": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
196+
"description": "LAION CLIP ViT-B/32 trained on 2B image-text pairs for joint image/text feature extraction.",
197+
"model_type": "clip",
198+
"task": "feature-extraction"
199+
},
200+
{
201+
"model_id": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
202+
"description": "LAION CLIP ViT-B/32 for zero-shot image classification via image-text similarity.",
203+
"model_type": "clip",
204+
"task": "zero-shot-image-classification"
205+
},
206+
{
207+
"model_id": "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
208+
"description": "LAION CLIP ViT-H/14 large model for high-accuracy zero-shot image classification.",
209+
"model_type": "clip",
210+
"task": "zero-shot-image-classification"
211+
},
212+
{
213+
"model_id": "mattmdjaga/segformer_b2_clothes",
214+
"description": "SegFormer-B2 fine-tuned for clothing segmentation in fashion images.",
215+
"model_type": "segformer",
216+
"task": "image-segmentation"
217+
},
218+
{
219+
"model_id": "microsoft/rad-dino",
220+
"description": "DINOv2-based vision model fine-tuned on chest X-rays for radiology feature extraction.",
221+
"model_type": "dinov2",
222+
"task": "image-feature-extraction"
223+
},
224+
{
225+
"model_id": "microsoft/resnet-50",
226+
"description": "Classic ResNet-50 model pre-trained on ImageNet-1k for image classification.",
227+
"model_type": "resnet",
228+
"task": "image-classification"
229+
},
230+
{
231+
"model_id": "microsoft/swin-large-patch4-window7-224",
232+
"description": "Swin Transformer large model for image classification at 224x224 resolution.",
233+
"model_type": "swin",
234+
"task": "image-classification"
235+
},
236+
{
237+
"model_id": "microsoft/table-transformer-detection",
238+
"description": "DETR-based model for detecting tables in document images.",
239+
"model_type": "table-transformer",
240+
"task": "object-detection"
241+
},
242+
{
243+
"model_id": "nvidia/segformer-b1-finetuned-ade-512-512",
244+
"description": "SegFormer-B1 fine-tuned on ADE20K for semantic segmentation at 512x512.",
245+
"model_type": "segformer",
246+
"task": "image-segmentation"
247+
},
248+
{
249+
"model_id": "nvidia/segformer-b2-finetuned-ade-512-512",
250+
"description": "SegFormer-B2 fine-tuned on ADE20K for semantic segmentation at 512x512.",
251+
"model_type": "segformer",
252+
"task": "image-segmentation"
253+
},
254+
{
255+
"model_id": "nvidia/segformer-b5-finetuned-ade-640-640",
256+
"description": "SegFormer-B5 (largest variant) fine-tuned on ADE20K for semantic segmentation at 640x640.",
257+
"model_type": "segformer",
258+
"task": "image-segmentation"
259+
},
260+
{
261+
"model_id": "openai/clip-vit-base-patch16",
262+
"description": "CLIP ViT-B/16 model for joint image-text embeddings with 16x16 patch size.",
263+
"model_type": "clip",
264+
"task": "feature-extraction"
265+
},
266+
{
267+
"model_id": "openai/clip-vit-base-patch16",
268+
"description": "OpenAI CLIP ViT-B/16 for zero-shot image classification via image-text similarity.",
269+
"model_type": "clip",
270+
"task": "zero-shot-image-classification"
271+
},
272+
{
273+
"model_id": "openai/clip-vit-base-patch32",
274+
"description": "CLIP ViT-B/32 model for joint image-text embeddings with 32x32 patch size.",
275+
"model_type": "clip",
276+
"task": "feature-extraction"
277+
},
278+
{
279+
"model_id": "openai/clip-vit-base-patch32",
280+
"description": "OpenAI CLIP ViT-B/32 for zero-shot image classification via image-text similarity.",
281+
"model_type": "clip",
282+
"task": "zero-shot-image-classification"
283+
},
284+
{
285+
"model_id": "openai/clip-vit-large-patch14",
286+
"description": "OpenAI CLIP ViT-L/14 for high-accuracy zero-shot image classification.",
287+
"model_type": "clip",
288+
"task": "zero-shot-image-classification"
289+
},
290+
{
291+
"model_id": "openai/clip-vit-large-patch14-336",
292+
"description": "OpenAI CLIP ViT-L/14 at 336px resolution for higher-accuracy zero-shot image classification.",
293+
"model_type": "clip",
294+
"task": "zero-shot-image-classification"
295+
},
296+
{
297+
"model_id": "patrickjohncyh/fashion-clip",
298+
"description": "CLIP fine-tuned on fashion product images for fashion-specific zero-shot classification.",
299+
"model_type": "clip",
300+
"task": "zero-shot-image-classification"
301+
},
302+
{
303+
"model_id": "rizvandwiki/gender-classification",
304+
"description": "ViT model fine-tuned for gender classification from facial images.",
305+
"model_type": "vit",
306+
"task": "image-classification"
307+
},
308+
{
309+
"model_id": "sentence-transformers/all-MiniLM-L6-v2",
310+
"description": "Lightweight sentence embedding model mapping text to 384-dim dense vectors.",
311+
"model_type": "bert",
312+
"task": "feature-extraction"
313+
},
314+
{
315+
"model_id": "sentence-transformers/all-MiniLM-L6-v2",
316+
"description": "Lightweight sentence embedding model optimized for semantic similarity tasks.",
317+
"model_type": "bert",
318+
"task": "sentence-similarity"
319+
},
320+
{
321+
"model_id": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
322+
"description": "Multilingual sentence embedding model supporting 50+ languages, 384-dim output.",
323+
"model_type": "bert",
324+
"task": "feature-extraction"
325+
},
326+
{
327+
"model_id": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
328+
"description": "Multilingual model for cross-lingual semantic similarity across 50+ languages.",
329+
"model_type": "bert",
330+
"task": "sentence-similarity"
331+
},
332+
{
333+
"model_id": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
334+
"description": "Multilingual MPNet model for high-quality semantic similarity across 50+ languages.",
335+
"model_type": "xlm-roberta",
336+
"task": "sentence-similarity"
337+
},
338+
{
339+
"model_id": "w11wo/indonesian-roberta-base-posp-tagger",
340+
"description": "RoBERTa model fine-tuned for Indonesian part-of-speech tagging.",
341+
"model_type": "roberta",
342+
"task": "token-classification"
343+
}
344+
]

0 commit comments

Comments
 (0)