Skip to content

O2 flag generates segmentation fault in my code #1763

Description

@Joao-Pedro-Cabral

Hello,

I'm using this function from PQClean:

void
PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, inner_shake256_context *src) {
    /*
     * To ensure reproducibility for a given seed, we
     * must enforce little-endian interpretation of
     * the state words.
     */
    uint8_t tmp[56];
    uint64_t th, tl;
    int i;

    uint32_t *d32 = (uint32_t *) p->state.d;
    uint64_t *d64 = (uint64_t *) p->state.d;

    inner_shake256_extract(src, tmp, 56);
    for (i = 0; i < 14; i ++) {
        uint32_t w;

        w = (uint32_t)tmp[(i << 2) + 0]
            | ((uint32_t)tmp[(i << 2) + 1] << 8)
            | ((uint32_t)tmp[(i << 2) + 2] << 16)
            | ((uint32_t)tmp[(i << 2) + 3] << 24);
        d32[i] = w;
    }
    tl = d32[48 / sizeof(uint32_t)];
     th = d32[52 / sizeof(uint32_t)];
    d64[48 / sizeof(uint64_t)] = tl + (th << 32);
    PQCLEAN_FALCON512_CLEAN_prng_refill(p);
}

When running my code in my x86_64 pc (with both O2 and O0 flags) everything works fine.

However, when using O2 flag and running the code in gem5 simulator, I got a segmentation fault.
First, I noted that with O2 flag the for (i = 0; i < 14; i ++) is auto-vectorized.
After, I used remote gdb in gem5 to debug my code. Then, I discover that the segmentation fault was caused by this line:
22768: 02b70187 vlm.v v3,(a4)
This a4 comes from:
2275c: 8ec18713 addi a4,gp,-1812 # 390ec <__TMC_END__+0x4>

Additionally, I tried another code with the same vlm.v instruction and the same vtype and vl configurations in gem5 (but with a different memory address) and no segmentation fault happened (only to verify if it's not a internal gem5 error).

What do you think about this issue? If you need any more information from my setup/code, feel free to ask me (I can share anything from this code).

O0 disassembly code from this function:

000000000002f430 <PQCLEAN_FALCON512_CLEAN_prng_init>:
   2f430:	7175                	addi	sp,sp,-144
   2f432:	e506                	sd	ra,136(sp)
   2f434:	e122                	sd	s0,128(sp)
   2f436:	0900                	addi	s0,sp,144
   2f438:	f6a43c23          	sd	a0,-136(s0)
   2f43c:	f6b43823          	sd	a1,-144(s0)
   2f440:	f7843783          	ld	a5,-136(s0)
   2f444:	20878793          	addi	a5,a5,520
   2f448:	fef43023          	sd	a5,-32(s0)
   2f44c:	f7843783          	ld	a5,-136(s0)
   2f450:	20878793          	addi	a5,a5,520
   2f454:	fcf43c23          	sd	a5,-40(s0)
   2f458:	f8840793          	addi	a5,s0,-120
   2f45c:	f7043603          	ld	a2,-144(s0)
   2f460:	03800593          	li	a1,56
   2f464:	853e                	mv	a0,a5
   2f466:	963e40ef          	jal	13dc8 <shake256_inc_squeeze>
   2f46a:	fe042623          	sw	zero,-20(s0)
   2f46e:	a045                	j	2f50e <PQCLEAN_FALCON512_CLEAN_prng_init+0xde>
   2f470:	fec42783          	lw	a5,-20(s0)
   2f474:	0027979b          	slliw	a5,a5,0x2
   2f478:	2781                	sext.w	a5,a5
   2f47a:	17c1                	addi	a5,a5,-16
   2f47c:	97a2                	add	a5,a5,s0
   2f47e:	f987c783          	lbu	a5,-104(a5)
   2f482:	0007871b          	sext.w	a4,a5
   2f486:	fec42783          	lw	a5,-20(s0)
   2f48a:	0027979b          	slliw	a5,a5,0x2
   2f48e:	2781                	sext.w	a5,a5
   2f490:	2785                	addiw	a5,a5,1
   2f492:	2781                	sext.w	a5,a5
   2f494:	17c1                	addi	a5,a5,-16
   2f496:	97a2                	add	a5,a5,s0
   2f498:	f987c783          	lbu	a5,-104(a5)
   2f49c:	2781                	sext.w	a5,a5
   2f49e:	0087979b          	slliw	a5,a5,0x8
   2f4a2:	2781                	sext.w	a5,a5
   2f4a4:	8fd9                	or	a5,a5,a4
   2f4a6:	0007871b          	sext.w	a4,a5
   2f4aa:	fec42783          	lw	a5,-20(s0)
   2f4ae:	0027979b          	slliw	a5,a5,0x2
   2f4b2:	2781                	sext.w	a5,a5
   2f4b4:	2789                	addiw	a5,a5,2
   2f4b6:	2781                	sext.w	a5,a5
   2f4b8:	17c1                	addi	a5,a5,-16
   2f4ba:	97a2                	add	a5,a5,s0
   2f4bc:	f987c783          	lbu	a5,-104(a5)
   2f4c0:	2781                	sext.w	a5,a5
   2f4c2:	0107979b          	slliw	a5,a5,0x10
   2f4c6:	2781                	sext.w	a5,a5
   2f4c8:	8fd9                	or	a5,a5,a4
   2f4ca:	0007871b          	sext.w	a4,a5
   2f4ce:	fec42783          	lw	a5,-20(s0)
   2f4d2:	0027979b          	slliw	a5,a5,0x2
   2f4d6:	2781                	sext.w	a5,a5
   2f4d8:	278d                	addiw	a5,a5,3
   2f4da:	2781                	sext.w	a5,a5
   2f4dc:	17c1                	addi	a5,a5,-16
   2f4de:	97a2                	add	a5,a5,s0
   2f4e0:	f987c783          	lbu	a5,-104(a5)
   2f4e4:	2781                	sext.w	a5,a5
   2f4e6:	0187979b          	slliw	a5,a5,0x18
   2f4ea:	2781                	sext.w	a5,a5
   2f4ec:	8fd9                	or	a5,a5,a4
   2f4ee:	fcf42223          	sw	a5,-60(s0)
   2f4f2:	fec42783          	lw	a5,-20(s0)
   2f4f6:	078a                	slli	a5,a5,0x2
   2f4f8:	fe043703          	ld	a4,-32(s0)
   2f4fc:	97ba                	add	a5,a5,a4
   2f4fe:	fc442703          	lw	a4,-60(s0)
   2f502:	c398                	sw	a4,0(a5)
   2f504:	fec42783          	lw	a5,-20(s0)
   2f508:	2785                	addiw	a5,a5,1
   2f50a:	fef42623          	sw	a5,-20(s0)
   2f50e:	fec42783          	lw	a5,-20(s0)
   2f512:	0007871b          	sext.w	a4,a5
   2f516:	47b5                	li	a5,13
   2f518:	f4e7dce3          	bge	a5,a4,2f470 <PQCLEAN_FALCON512_CLEAN_prng_init+0x40>
   2f51c:	fe043783          	ld	a5,-32(s0)
   2f520:	03078793          	addi	a5,a5,48
   2f524:	439c                	lw	a5,0(a5)
   2f526:	1782                	slli	a5,a5,0x20
   2f528:	9381                	srli	a5,a5,0x20
   2f52a:	fcf43823          	sd	a5,-48(s0)
   2f52e:	fe043783          	ld	a5,-32(s0)
   2f532:	03478793          	addi	a5,a5,52
   2f536:	439c                	lw	a5,0(a5)
   2f538:	1782                	slli	a5,a5,0x20
   2f53a:	9381                	srli	a5,a5,0x20
   2f53c:	fcf43423          	sd	a5,-56(s0)
   2f540:	fc843783          	ld	a5,-56(s0)
   2f544:	02079693          	slli	a3,a5,0x20
   2f548:	fd843783          	ld	a5,-40(s0)
   2f54c:	03078793          	addi	a5,a5,48
   2f550:	fd043703          	ld	a4,-48(s0)
   2f554:	9736                	add	a4,a4,a3
   2f556:	e398                	sd	a4,0(a5)
   2f558:	f7843503          	ld	a0,-136(s0)
   2f55c:	010000ef          	jal	2f56c <PQCLEAN_FALCON512_CLEAN_prng_refill>
   2f560:	0001                	nop
   2f562:	0001                	nop
   2f564:	60aa                	ld	ra,136(sp)
   2f566:	640a                	ld	s0,128(sp)
   2f568:	6149                	addi	sp,sp,144
   2f56a:	8082                	ret

O2 disassembly code from this function:

0000000000022748 <PQCLEAN_FALCON512_CLEAN_prng_init>:
   22748:	715d                	addi	sp,sp,-80
   2274a:	e0a2                	sd	s0,64(sp)
   2274c:	862e                	mv	a2,a1
   2274e:	842a                	mv	s0,a0
   22750:	03800593          	li	a1,56
   22754:	0028                	addi	a0,sp,8
   22756:	e486                	sd	ra,72(sp)
   22758:	9c3ef0ef          	jal	1211a <shake256_inc_squeeze>
   2275c:	8ec18713          	addi	a4,gp,-1812 # 390ec <__TMC_END__+0x4>
   22760:	8ed18793          	addi	a5,gp,-1811 # 390ed <__TMC_END__+0x5>
   22764:	cc627057          	vsetivli	zero,4,e8,mf4,ta,ma
   22768:	02b70187          	vlm.v	v3,(a4)
   2276c:	02b78107          	vlm.v	v2,(a5)
   22770:	20840713          	addi	a4,s0,520
   22774:	003c                	addi	a5,sp,8
   22776:	23840513          	addi	a0,s0,568
   2277a:	00478593          	addi	a1,a5,4
   2277e:	02058087          	vle8.v	v1,(a1)
   22782:	9e303057          	vmv1r.v	v0,v3
   22786:	02078207          	vle8.v	v4,(a5)
   2278a:	00878613          	addi	a2,a5,8
   2278e:	02060407          	vle8.v	v8,(a2)
   22792:	00c78693          	addi	a3,a5,12
   22796:	5e1025d7          	vcompress.vm	v11,v1,v0
   2279a:	9e203057          	vmv1r.v	v0,v2
   2279e:	02068387          	vle8.v	v7,(a3)
   227a2:	07c1                	addi	a5,a5,16
   227a4:	5e102557          	vcompress.vm	v10,v1,v0
   227a8:	9e303057          	vmv1r.v	v0,v3
   227ac:	5e402357          	vcompress.vm	v6,v4,v0
   227b0:	9e203057          	vmv1r.v	v0,v2
   227b4:	3ab13357          	vslideup.vi	v6,v11,2
   227b8:	5e4020d7          	vcompress.vm	v1,v4,v0
   227bc:	5e802257          	vcompress.vm	v4,v8,v0
   227c0:	9e303057          	vmv1r.v	v0,v3
   227c4:	3aa130d7          	vslideup.vi	v1,v10,2
   227c8:	5e8022d7          	vcompress.vm	v5,v8,v0
   227cc:	9e203057          	vmv1r.v	v0,v2
   227d0:	5e7024d7          	vcompress.vm	v9,v7,v0
   227d4:	9e303057          	vmv1r.v	v0,v3
   227d8:	3a913257          	vslideup.vi	v4,v9,2
   227dc:	5e702457          	vcompress.vm	v8,v7,v0
   227e0:	5e1024d7          	vcompress.vm	v9,v1,v0
   227e4:	5e4023d7          	vcompress.vm	v7,v4,v0
   227e8:	3a8132d7          	vslideup.vi	v5,v8,2
   227ec:	5e602457          	vcompress.vm	v8,v6,v0
   227f0:	3a7134d7          	vslideup.vi	v9,v7,2
   227f4:	5e502557          	vcompress.vm	v10,v5,v0
   227f8:	9e203057          	vmv1r.v	v0,v2
   227fc:	3aa13457          	vslideup.vi	v8,v10,2
   22800:	5e6023d7          	vcompress.vm	v7,v6,v0
   22804:	5e502557          	vcompress.vm	v10,v5,v0
   22808:	5e102357          	vcompress.vm	v6,v1,v0
   2280c:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22810:	4a9220d7          	vzext.vf4	v1,v9
   22814:	0c607057          	vsetvli	zero,zero,e8,mf4,ta,ma
   22818:	5e4024d7          	vcompress.vm	v9,v4,v0
   2281c:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22820:	4a8222d7          	vzext.vf4	v5,v8
   22824:	0c607057          	vsetvli	zero,zero,e8,mf4,ta,ma
   22828:	3aa133d7          	vslideup.vi	v7,v10,2
   2282c:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22830:	961830d7          	vsll.vi	v1,v1,16
   22834:	0c607057          	vsetvli	zero,zero,e8,mf4,ta,ma
   22838:	3a913357          	vslideup.vi	v6,v9,2
   2283c:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22840:	965c32d7          	vsll.vi	v5,v5,24
   22844:	0cf07057          	vsetvli	zero,zero,e16,mf2,ta,ma
   22848:	4a732257          	vzext.vf2	v4,v7
   2284c:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22850:	2a1280d7          	vor.vv	v1,v1,v5
   22854:	4a6223d7          	vzext.vf4	v7,v6
   22858:	0cf07057          	vsetvli	zero,zero,e16,mf2,ta,ma
   2285c:	96443257          	vsll.vi	v4,v4,8
   22860:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22864:	2a1380d7          	vor.vv	v1,v1,v7
   22868:	4a4322d7          	vzext.vf2	v5,v4
   2286c:	2a1280d7          	vor.vv	v1,v1,v5
   22870:	020760a7          	vse32.v	v1,(a4)
   22874:	0741                	addi	a4,a4,16
   22876:	00e50563          	beq	a0,a4,22880 <PQCLEAN_FALCON512_CLEAN_prng_init+0x138>
   2287a:	0c607057          	vsetvli	zero,zero,e8,mf4,ta,ma
   2287e:	bdf5                	j	2277a <PQCLEAN_FALCON512_CLEAN_prng_init+0x32>
   22880:	03914603          	lbu	a2,57(sp)
   22884:	03a14783          	lbu	a5,58(sp)
   22888:	03d14683          	lbu	a3,61(sp)
   2288c:	03e14583          	lbu	a1,62(sp)
   22890:	03814803          	lbu	a6,56(sp)
   22894:	03b14703          	lbu	a4,59(sp)
   22898:	0107979b          	slliw	a5,a5,0x10
   2289c:	0086161b          	slliw	a2,a2,0x8
   228a0:	03c14503          	lbu	a0,60(sp)
   228a4:	8e5d                	or	a2,a2,a5
   228a6:	03f14783          	lbu	a5,63(sp)
   228aa:	0105959b          	slliw	a1,a1,0x10
   228ae:	0086969b          	slliw	a3,a3,0x8
   228b2:	01066633          	or	a2,a2,a6
   228b6:	8ecd                	or	a3,a3,a1
   228b8:	0187171b          	slliw	a4,a4,0x18
   228bc:	8ec9                	or	a3,a3,a0
   228be:	8f51                	or	a4,a4,a2
   228c0:	0187979b          	slliw	a5,a5,0x18
   228c4:	8fd5                	or	a5,a5,a3
   228c6:	1702                	slli	a4,a4,0x20
   228c8:	9301                	srli	a4,a4,0x20
   228ca:	1782                	slli	a5,a5,0x20
   228cc:	8fd9                	or	a5,a5,a4
   228ce:	22f43c23          	sd	a5,568(s0)
   228d2:	8522                	mv	a0,s0
   228d4:	969ff0ef          	jal	2223c <PQCLEAN_FALCON512_CLEAN_prng_refill>
   228d8:	60a6                	ld	ra,72(sp)
   228da:	6406                	ld	s0,64(sp)
   228dc:	6161                	addi	sp,sp,80
   228de:	8082                	ret

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions