Skip to content
Open
Show file tree
Hide file tree
Changes from 65 commits
Commits
Show all changes
87 commits
Select commit Hold shift + click to select a range
afa0a11
vulkan: add int8 kernels for core layers
nihui May 28, 2026
3203318
wip
nihui May 28, 2026
fd24d89
wip
nihui May 28, 2026
ef9bf88
w
nihui May 28, 2026
511d385
w
nihui May 28, 2026
9daec25
w
nihui May 28, 2026
48b2b89
doc
nihui May 28, 2026
fbf4170
Merge branch 'master' into vulkan-int-2
nihui May 29, 2026
670ab6b
w
nihui May 29, 2026
6a7cad2
wip
nihui May 29, 2026
5c1a6c7
w
nihui Jun 1, 2026
4bf0cc3
w
nihui Jun 1, 2026
e6fc454
w
nihui Jun 1, 2026
56479b6
apply code-format changes
nihui Jun 1, 2026
29fb6e1
w
nihui Jun 1, 2026
a425cb4
w
nihui Jun 1, 2026
a258b65
w
nihui Jun 1, 2026
77b622b
enable int8 gpu benchmark
nihui Jun 1, 2026
c224342
f
nihui Jun 1, 2026
8a869c6
w
nihui Jun 2, 2026
6bd20c3
w
nihui Jun 3, 2026
a2878ad
apply code-format changes
nihui Jun 3, 2026
870b3c2
w
nihui Jun 3, 2026
25fb1e1
cc
nihui Jun 3, 2026
8e1a5c1
w
nihui Jun 3, 2026
327943c
apply code-format changes
nihui Jun 3, 2026
7585fbb
w
nihui Jun 4, 2026
22f5b3a
Merge branch 'vulkan-int-2' of https://github.com/nihui/ncnn into vul…
nihui Jun 4, 2026
4615409
w
nihui Jun 4, 2026
d282bf9
w
nihui Jun 4, 2026
d18fd45
Merge branch 'master' into vulkan-int-2
nihui Jun 8, 2026
38c8ec6
w
nihui Jun 8, 2026
5b782d1
cm
nihui Jun 8, 2026
df83166
apply code-format changes
nihui Jun 8, 2026
fbc3363
w
nihui Jun 9, 2026
a348e6f
w
nihui Jun 9, 2026
5484d82
Revert "w"
nihui Jun 9, 2026
86aefff
w
nihui Jun 9, 2026
fd9a06a
w
nihui Jun 9, 2026
8f318c8
w
nihui Jun 9, 2026
794f636
w
nihui Jun 9, 2026
2394f70
w
nihui Jun 9, 2026
dbc3a7e
w
nihui Jun 9, 2026
ee43f88
apply code-format changes
nihui Jun 9, 2026
d81e39e
w
nihui Jun 9, 2026
3304f27
Merge branch 'vulkan-int-2' of https://github.com/nihui/ncnn into vul…
nihui Jun 9, 2026
9978b9f
w
nihui Jun 9, 2026
cd08386
shape hint for int8 models
nihui Jun 9, 2026
e24c32c
w
nihui Jun 10, 2026
3b7f259
apply code-format changes
nihui Jun 10, 2026
0df6bed
w
nihui Jun 10, 2026
91233f6
w
nihui Jun 10, 2026
6366e23
apply code-format changes
nihui Jun 10, 2026
3556539
w
nihui Jun 10, 2026
b851cd9
Merge branch 'vulkan-int-2' of https://github.com/nihui/ncnn into vul…
nihui Jun 10, 2026
01b7f0d
f
nihui Jun 10, 2026
c7bcfc0
f
nihui Jun 10, 2026
4b70fb0
apply code-format changes
nihui Jun 10, 2026
8ff9eb6
w
nihui Jun 10, 2026
4e501b2
w
nihui Jun 10, 2026
b1bec8a
w
nihui Jun 10, 2026
c530346
w
nihui Jun 10, 2026
642e64e
pp
nihui Jun 10, 2026
8b0dc92
w
nihui Jun 10, 2026
034f555
w
nihui Jun 10, 2026
7c0aa1b
vulkan: revert int8 winograd cm input swizzle
nihui Jun 10, 2026
3e70946
w
nihui Jun 10, 2026
4fdec30
w
nihui Jun 10, 2026
a676073
w
nihui Jun 10, 2026
5b545dd
w
nihui Jun 10, 2026
2109b5a
s
nihui Jun 10, 2026
7b4c112
w
nihui Jun 10, 2026
604a61c
w
nihui Jun 11, 2026
8c09298
w
nihui Jun 11, 2026
cd120be
w
nihui Jun 11, 2026
a9dcb3f
f
nihui Jun 11, 2026
ae1231a
apply code-format changes
nihui Jun 11, 2026
e53d306
w
nihui Jun 11, 2026
c96f47f
Merge branch 'vulkan-int-2' of https://github.com/nihui/ncnn into vul…
nihui Jun 11, 2026
9f11fe8
w
nihui Jun 11, 2026
ea98740
b
nihui Jun 11, 2026
0acc622
b
nihui Jun 11, 2026
5c7a631
w
nihui Jun 11, 2026
12dcacf
Revert "w"
nihui Jun 11, 2026
1c660ec
w
nihui Jun 11, 2026
a0f5504
w
nihui Jun 11, 2026
a6d87e8
vulkan: keep int8 convolution bias in fp32
nihui Jun 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions benchmark/benchncnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,6 @@ static ncnn::VkAllocator* g_staging_vkallocator = 0;

void benchmark(const char* comment, const std::vector<ncnn::Mat>& _in, const ncnn::Option& opt, const char* model_param_data = NULL)
{
// Skip if int8 model name and using GPU
if (opt.use_vulkan_compute && strstr(comment, "int8") != NULL)
{
if (!model_param_data)
fprintf(stderr, "%20s skipped (int8+GPU not supported)\n", comment);
return;
}

g_blob_pool_allocator.clear();
g_workspace_pool_allocator.clear();

Expand Down
32 changes: 16 additions & 16 deletions benchmark/models/efficientnet_b0.param
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Split splitncnn_0 1 2 368 368_splitncnn_0 368_sp
Pooling GlobalAveragePool_8 1 1 368_splitncnn_1 369 -23330=4,1,32,1,1 0=1 4=1
InnerProduct Conv_9 1 1 369 370 -23330=4,1,8,1,1 0=8 1=1 2=256
Swish Mul_11 1 1 370 372 -23330=4,1,8,1,1
Convolution Conv_12 1 1 372 374 -23330=4,1,32,1,1 0=32 1=1 5=1 6=256 9=4
InnerProduct Conv_12 1 1 372 374 -23330=4,1,32,1,1 0=32 1=1 2=256 9=4
BinaryOp Mul_14 2 1 368_splitncnn_0 374 375 -23330=4,3,112,112,32 0=2
Convolution Conv_15 1 1 375 377 -23330=4,3,112,112,16 0=16 1=1 5=1 6=512
Convolution Conv_17 1 1 377 379 -23330=4,3,112,112,96 0=96 1=1 5=1 6=1536
Expand All @@ -20,7 +20,7 @@ Split splitncnn_1 1 2 385 385_splitncnn_0 385_sp
Pooling GlobalAveragePool_25 1 1 385_splitncnn_1 386 -23330=4,1,96,1,1 0=1 4=1
InnerProduct Conv_26 1 1 386 387 -23330=4,1,4,1,1 0=4 1=1 2=384
Swish Mul_28 1 1 387 389 -23330=4,1,4,1,1
Convolution Conv_29 1 1 389 391 -23330=4,1,96,1,1 0=96 1=1 5=1 6=384 9=4
InnerProduct Conv_29 1 1 389 391 -23330=4,1,96,1,1 0=96 1=1 2=384 9=4
BinaryOp Mul_31 2 1 385_splitncnn_0 391 392 -23330=4,3,56,56,96 0=2
Convolution Conv_32 1 1 392 394 -23330=4,3,56,56,24 0=24 1=1 5=1 6=2304
Split splitncnn_2 1 2 394 394_splitncnn_0 394_splitncnn_1 -23330=8,3,56,56,24,3,56,56,24
Expand All @@ -32,7 +32,7 @@ Split splitncnn_3 1 2 402 402_splitncnn_0 402_sp
Pooling GlobalAveragePool_42 1 1 402_splitncnn_1 403 -23330=4,1,144,1,1 0=1 4=1
InnerProduct Conv_43 1 1 403 404 -23330=4,1,6,1,1 0=6 1=1 2=864
Swish Mul_45 1 1 404 406 -23330=4,1,6,1,1
Convolution Conv_46 1 1 406 408 -23330=4,1,144,1,1 0=144 1=1 5=1 6=864 9=4
InnerProduct Conv_46 1 1 406 408 -23330=4,1,144,1,1 0=144 1=1 2=864 9=4
BinaryOp Mul_48 2 1 402_splitncnn_0 408 409 -23330=4,3,56,56,144 0=2
Convolution Conv_49 1 1 409 411 -23330=4,3,56,56,24 0=24 1=1 5=1 6=3456
BinaryOp Add_51 2 1 394_splitncnn_0 411 412 -23330=4,3,56,56,24
Expand All @@ -44,7 +44,7 @@ Split splitncnn_4 1 2 420 420_splitncnn_0 420_sp
Pooling GlobalAveragePool_60 1 1 420_splitncnn_1 421 -23330=4,1,144,1,1 0=1 4=1
InnerProduct Conv_61 1 1 421 422 -23330=4,1,6,1,1 0=6 1=1 2=864
Swish Mul_63 1 1 422 424 -23330=4,1,6,1,1
Convolution Conv_64 1 1 424 426 -23330=4,1,144,1,1 0=144 1=1 5=1 6=864 9=4
InnerProduct Conv_64 1 1 424 426 -23330=4,1,144,1,1 0=144 1=1 2=864 9=4
BinaryOp Mul_66 2 1 420_splitncnn_0 426 427 -23330=4,3,28,28,144 0=2
Convolution Conv_67 1 1 427 429 -23330=4,3,28,28,40 0=40 1=1 5=1 6=5760
Split splitncnn_5 1 2 429 429_splitncnn_0 429_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
Expand All @@ -56,7 +56,7 @@ Split splitncnn_6 1 2 437 437_splitncnn_0 437_sp
Pooling GlobalAveragePool_77 1 1 437_splitncnn_1 438 -23330=4,1,240,1,1 0=1 4=1
InnerProduct Conv_78 1 1 438 439 -23330=4,1,10,1,1 0=10 1=1 2=2400
Swish Mul_80 1 1 439 441 -23330=4,1,10,1,1
Convolution Conv_81 1 1 441 443 -23330=4,1,240,1,1 0=240 1=1 5=1 6=2400 9=4
InnerProduct Conv_81 1 1 441 443 -23330=4,1,240,1,1 0=240 1=1 2=2400 9=4
BinaryOp Mul_83 2 1 437_splitncnn_0 443 444 -23330=4,3,28,28,240 0=2
Convolution Conv_84 1 1 444 446 -23330=4,3,28,28,40 0=40 1=1 5=1 6=9600
BinaryOp Add_86 2 1 429_splitncnn_0 446 447 -23330=4,3,28,28,40
Expand All @@ -68,7 +68,7 @@ Split splitncnn_7 1 2 455 455_splitncnn_0 455_sp
Pooling GlobalAveragePool_95 1 1 455_splitncnn_1 456 -23330=4,1,240,1,1 0=1 4=1
InnerProduct Conv_96 1 1 456 457 -23330=4,1,10,1,1 0=10 1=1 2=2400
Swish Mul_98 1 1 457 459 -23330=4,1,10,1,1
Convolution Conv_99 1 1 459 461 -23330=4,1,240,1,1 0=240 1=1 5=1 6=2400 9=4
InnerProduct Conv_99 1 1 459 461 -23330=4,1,240,1,1 0=240 1=1 2=2400 9=4
BinaryOp Mul_101 2 1 455_splitncnn_0 461 462 -23330=4,3,14,14,240 0=2
Convolution Conv_102 1 1 462 464 -23330=4,3,14,14,80 0=80 1=1 5=1 6=19200
Split splitncnn_8 1 2 464 464_splitncnn_0 464_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
Expand All @@ -80,7 +80,7 @@ Split splitncnn_9 1 2 472 472_splitncnn_0 472_sp
Pooling GlobalAveragePool_112 1 1 472_splitncnn_1 473 -23330=4,1,480,1,1 0=1 4=1
InnerProduct Conv_113 1 1 473 474 -23330=4,1,20,1,1 0=20 1=1 2=9600
Swish Mul_115 1 1 474 476 -23330=4,1,20,1,1
Convolution Conv_116 1 1 476 478 -23330=4,1,480,1,1 0=480 1=1 5=1 6=9600 9=4
InnerProduct Conv_116 1 1 476 478 -23330=4,1,480,1,1 0=480 1=1 2=9600 9=4
BinaryOp Mul_118 2 1 472_splitncnn_0 478 479 -23330=4,3,14,14,480 0=2
Convolution Conv_119 1 1 479 481 -23330=4,3,14,14,80 0=80 1=1 5=1 6=38400
BinaryOp Add_121 2 1 464_splitncnn_0 481 482 -23330=4,3,14,14,80
Expand All @@ -93,7 +93,7 @@ Split splitncnn_11 1 2 490 490_splitncnn_0 490_sp
Pooling GlobalAveragePool_130 1 1 490_splitncnn_1 491 -23330=4,1,480,1,1 0=1 4=1
InnerProduct Conv_131 1 1 491 492 -23330=4,1,20,1,1 0=20 1=1 2=9600
Swish Mul_133 1 1 492 494 -23330=4,1,20,1,1
Convolution Conv_134 1 1 494 496 -23330=4,1,480,1,1 0=480 1=1 5=1 6=9600 9=4
InnerProduct Conv_134 1 1 494 496 -23330=4,1,480,1,1 0=480 1=1 2=9600 9=4
BinaryOp Mul_136 2 1 490_splitncnn_0 496 497 -23330=4,3,14,14,480 0=2
Convolution Conv_137 1 1 497 499 -23330=4,3,14,14,80 0=80 1=1 5=1 6=38400
BinaryOp Add_139 2 1 482_splitncnn_0 499 500 -23330=4,3,14,14,80
Expand All @@ -105,7 +105,7 @@ Split splitncnn_12 1 2 508 508_splitncnn_0 508_sp
Pooling GlobalAveragePool_148 1 1 508_splitncnn_1 509 -23330=4,1,480,1,1 0=1 4=1
InnerProduct Conv_149 1 1 509 510 -23330=4,1,20,1,1 0=20 1=1 2=9600
Swish Mul_151 1 1 510 512 -23330=4,1,20,1,1
Convolution Conv_152 1 1 512 514 -23330=4,1,480,1,1 0=480 1=1 5=1 6=9600 9=4
InnerProduct Conv_152 1 1 512 514 -23330=4,1,480,1,1 0=480 1=1 2=9600 9=4
BinaryOp Mul_154 2 1 508_splitncnn_0 514 515 -23330=4,3,14,14,480 0=2
Convolution Conv_155 1 1 515 517 -23330=4,3,14,14,112 0=112 1=1 5=1 6=53760
Split splitncnn_13 1 2 517 517_splitncnn_0 517_splitncnn_1 -23330=8,3,14,14,112,3,14,14,112
Expand All @@ -117,7 +117,7 @@ Split splitncnn_14 1 2 525 525_splitncnn_0 525_sp
Pooling GlobalAveragePool_165 1 1 525_splitncnn_1 526 -23330=4,1,672,1,1 0=1 4=1
InnerProduct Conv_166 1 1 526 527 -23330=4,1,28,1,1 0=28 1=1 2=18816
Swish Mul_168 1 1 527 529 -23330=4,1,28,1,1
Convolution Conv_169 1 1 529 531 -23330=4,1,672,1,1 0=672 1=1 5=1 6=18816 9=4
InnerProduct Conv_169 1 1 529 531 -23330=4,1,672,1,1 0=672 1=1 2=18816 9=4
BinaryOp Mul_171 2 1 525_splitncnn_0 531 532 -23330=4,3,14,14,672 0=2
Convolution Conv_172 1 1 532 534 -23330=4,3,14,14,112 0=112 1=1 5=1 6=75264
BinaryOp Add_174 2 1 517_splitncnn_0 534 535 -23330=4,3,14,14,112
Expand All @@ -130,7 +130,7 @@ Split splitncnn_16 1 2 543 543_splitncnn_0 543_sp
Pooling GlobalAveragePool_183 1 1 543_splitncnn_1 544 -23330=4,1,672,1,1 0=1 4=1
InnerProduct Conv_184 1 1 544 545 -23330=4,1,28,1,1 0=28 1=1 2=18816
Swish Mul_186 1 1 545 547 -23330=4,1,28,1,1
Convolution Conv_187 1 1 547 549 -23330=4,1,672,1,1 0=672 1=1 5=1 6=18816 9=4
InnerProduct Conv_187 1 1 547 549 -23330=4,1,672,1,1 0=672 1=1 2=18816 9=4
BinaryOp Mul_189 2 1 543_splitncnn_0 549 550 -23330=4,3,14,14,672 0=2
Convolution Conv_190 1 1 550 552 -23330=4,3,14,14,112 0=112 1=1 5=1 6=75264
BinaryOp Add_192 2 1 535_splitncnn_0 552 553 -23330=4,3,14,14,112
Expand All @@ -142,7 +142,7 @@ Split splitncnn_17 1 2 561 561_splitncnn_0 561_sp
Pooling GlobalAveragePool_201 1 1 561_splitncnn_1 562 -23330=4,1,672,1,1 0=1 4=1
InnerProduct Conv_202 1 1 562 563 -23330=4,1,28,1,1 0=28 1=1 2=18816
Swish Mul_204 1 1 563 565 -23330=4,1,28,1,1
Convolution Conv_205 1 1 565 567 -23330=4,1,672,1,1 0=672 1=1 5=1 6=18816 9=4
InnerProduct Conv_205 1 1 565 567 -23330=4,1,672,1,1 0=672 1=1 2=18816 9=4
BinaryOp Mul_207 2 1 561_splitncnn_0 567 568 -23330=4,3,7,7,672 0=2
Convolution Conv_208 1 1 568 570 -23330=4,3,7,7,192 0=192 1=1 5=1 6=129024
Split splitncnn_18 1 2 570 570_splitncnn_0 570_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
Expand All @@ -154,7 +154,7 @@ Split splitncnn_19 1 2 578 578_splitncnn_0 578_sp
Pooling GlobalAveragePool_218 1 1 578_splitncnn_1 579 -23330=4,1,1152,1,1 0=1 4=1
InnerProduct Conv_219 1 1 579 580 -23330=4,1,48,1,1 0=48 1=1 2=55296
Swish Mul_221 1 1 580 582 -23330=4,1,48,1,1
Convolution Conv_222 1 1 582 584 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
InnerProduct Conv_222 1 1 582 584 -23330=4,1,1152,1,1 0=1152 1=1 2=55296 9=4
BinaryOp Mul_224 2 1 578_splitncnn_0 584 585 -23330=4,3,7,7,1152 0=2
Convolution Conv_225 1 1 585 587 -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
BinaryOp Add_227 2 1 570_splitncnn_0 587 588 -23330=4,3,7,7,192
Expand All @@ -167,7 +167,7 @@ Split splitncnn_21 1 2 596 596_splitncnn_0 596_sp
Pooling GlobalAveragePool_236 1 1 596_splitncnn_1 597 -23330=4,1,1152,1,1 0=1 4=1
InnerProduct Conv_237 1 1 597 598 -23330=4,1,48,1,1 0=48 1=1 2=55296
Swish Mul_239 1 1 598 600 -23330=4,1,48,1,1
Convolution Conv_240 1 1 600 602 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
InnerProduct Conv_240 1 1 600 602 -23330=4,1,1152,1,1 0=1152 1=1 2=55296 9=4
BinaryOp Mul_242 2 1 596_splitncnn_0 602 603 -23330=4,3,7,7,1152 0=2
Convolution Conv_243 1 1 603 605 -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
BinaryOp Add_245 2 1 588_splitncnn_0 605 606 -23330=4,3,7,7,192
Expand All @@ -180,7 +180,7 @@ Split splitncnn_23 1 2 614 614_splitncnn_0 614_sp
Pooling GlobalAveragePool_254 1 1 614_splitncnn_1 615 -23330=4,1,1152,1,1 0=1 4=1
InnerProduct Conv_255 1 1 615 616 -23330=4,1,48,1,1 0=48 1=1 2=55296
Swish Mul_257 1 1 616 618 -23330=4,1,48,1,1
Convolution Conv_258 1 1 618 620 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
InnerProduct Conv_258 1 1 618 620 -23330=4,1,1152,1,1 0=1152 1=1 2=55296 9=4
BinaryOp Mul_260 2 1 614_splitncnn_0 620 621 -23330=4,3,7,7,1152 0=2
Convolution Conv_261 1 1 621 623 -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
BinaryOp Add_263 2 1 606_splitncnn_0 623 624 -23330=4,3,7,7,192
Expand All @@ -192,7 +192,7 @@ Split splitncnn_24 1 2 632 632_splitncnn_0 632_sp
Pooling GlobalAveragePool_272 1 1 632_splitncnn_1 633 -23330=4,1,1152,1,1 0=1 4=1
InnerProduct Conv_273 1 1 633 634 -23330=4,1,48,1,1 0=48 1=1 2=55296
Swish Mul_275 1 1 634 636 -23330=4,1,48,1,1
Convolution Conv_276 1 1 636 638 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
InnerProduct Conv_276 1 1 636 638 -23330=4,1,1152,1,1 0=1152 1=1 2=55296 9=4
BinaryOp Mul_278 2 1 632_splitncnn_0 638 639 -23330=4,3,7,7,1152 0=2
Convolution Conv_279 1 1 639 641 -23330=4,3,7,7,320 0=320 1=1 5=1 6=368640
Convolution Conv_281 1 1 641 643 -23330=4,3,7,7,1280 0=1280 1=1 5=1 6=409600
Expand Down
Loading
Loading