-
Notifications
You must be signed in to change notification settings - Fork 48
Expand file tree
/
Copy pathwhisper_large_v3_turbo_encoder_qdq_ctx.json
More file actions
60 lines (60 loc) · 2.11 KB
/
whisper_large_v3_turbo_encoder_qdq_ctx.json
File metadata and controls
60 lines (60 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
{
"input_model": {
"type": "PyTorchModel",
"model_path": "openai/whisper-large-v3-turbo",
"model_loader": "model_loader",
"model_script": "whisper_encoder_load.py",
"io_config": {
"input_names": [ "input_features" ],
"output_names": [ "k_cache_cross_0",
"v_cache_cross_0",
"k_cache_cross_1",
"v_cache_cross_1",
"k_cache_cross_2",
"v_cache_cross_2",
"k_cache_cross_3",
"v_cache_cross_3" ]
},
"dummy_inputs_func": "generate_dummy_inputs"
},
"systems": {
"local_system": {
"type": "LocalSystem",
"accelerators": [ { "device": "npu", "execution_providers": [ "QNNExecutionProvider" ] } ]
}
},
"data_configs": [
{
"name": "quantize_data_config",
"user_script": "whisper_encoder_load.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "encoder_data_loader",
"data_path": ".\\data\\quantization_data" }
}
],
"passes": {
"convert": { "type": "OnnxConversion", "target_opset": 20 },
"quantization": {
"type": "OnnxStaticQuantization",
"data_config": "quantize_data_config",
"activation_type": "uint16",
"precision": "uint8",
"calibrate_method": "MinMax",
"quant_preprocess": true
},
"cb": {
"type": "EPContextBinaryGenerator",
"provider_options": {
"htp_performance_mode": "burst",
"htp_graph_finalization_optimization_mode": "3",
"offload_graph_io_quantization": "0",
"soc_model": "60"
}
}
},
"log_severity_level": 0,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_dir": "models/whisper_encoder_qdq_ctx"
}