1- #![ allow( clippy:: needless_range_loop) ]
2- use crate :: table:: TABLE ;
3-
41pub ( crate ) const COLS : usize = 8 ;
5- const ROUNDS : u64 = 10 ;
62
7- #[ inline( always) ]
8- fn column ( x : & [ u64 ; COLS ] , c : [ usize ; 8 ] ) -> u64 {
9- let mut t = 0 ;
10- for i in 0 ..8 {
11- let sl = 8 * ( 7 - i) ;
12- let idx = ( ( x[ c[ i] ] >> sl) & 0xFF ) as usize ;
13- t ^= TABLE [ i] [ idx] ;
14- }
15- t
16- }
3+ mod soft;
174
18- #[ inline( always) ]
19- fn rndq ( mut x : [ u64 ; COLS ] , r : u64 ) -> [ u64 ; COLS ] {
20- for i in 0 ..COLS {
21- x[ i] ^= u64:: MAX . wrapping_sub ( ( i as u64 ) << 4 ) ^ r;
22- }
23- [
24- column ( & x, [ 1 , 3 , 5 , 7 , 0 , 2 , 4 , 6 ] ) ,
25- column ( & x, [ 2 , 4 , 6 , 0 , 1 , 3 , 5 , 7 ] ) ,
26- column ( & x, [ 3 , 5 , 7 , 1 , 2 , 4 , 6 , 0 ] ) ,
27- column ( & x, [ 4 , 6 , 0 , 2 , 3 , 5 , 7 , 1 ] ) ,
28- column ( & x, [ 5 , 7 , 1 , 3 , 4 , 6 , 0 , 2 ] ) ,
29- column ( & x, [ 6 , 0 , 2 , 4 , 5 , 7 , 1 , 3 ] ) ,
30- column ( & x, [ 7 , 1 , 3 , 5 , 6 , 0 , 2 , 4 ] ) ,
31- column ( & x, [ 0 , 2 , 4 , 6 , 7 , 1 , 3 , 5 ] ) ,
32- ]
33- }
5+ cfg_if:: cfg_if! {
6+ if #[ cfg( any( not( any( target_arch = "x86_64" , target_arch = "x86" ) ) , groestl_force_soft) ) ] {
7+ pub ( crate ) use soft:: * ;
8+ } else {
9+ mod avx512_gfni;
3410
35- #[ inline( always) ]
36- fn rndp ( mut x : [ u64 ; COLS ] , r : u64 ) -> [ u64 ; COLS ] {
37- for i in 0 ..COLS {
38- x[ i] ^= ( ( i as u64 ) << 60 ) ^ r;
39- }
40- [
41- column ( & x, [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ) ,
42- column ( & x, [ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 0 ] ) ,
43- column ( & x, [ 2 , 3 , 4 , 5 , 6 , 7 , 0 , 1 ] ) ,
44- column ( & x, [ 3 , 4 , 5 , 6 , 7 , 0 , 1 , 2 ] ) ,
45- column ( & x, [ 4 , 5 , 6 , 7 , 0 , 1 , 2 , 3 ] ) ,
46- column ( & x, [ 5 , 6 , 7 , 0 , 1 , 2 , 3 , 4 ] ) ,
47- column ( & x, [ 6 , 7 , 0 , 1 , 2 , 3 , 4 , 5 ] ) ,
48- column ( & x, [ 7 , 0 , 1 , 2 , 3 , 4 , 5 , 6 ] ) ,
49- ]
50- }
11+ cpufeatures:: new!( cpuid_avx512_gfni, "avx" , "avx512f" , "avx512vbmi" , "gfni" ) ;
5112
52- pub ( crate ) fn compress ( h : & mut [ u64 ; COLS ] , block : & [ u8 ; 64 ] ) {
53- let mut q = [ 0u64 ; COLS ] ;
54- for ( chunk, v) in block. chunks_exact ( 8 ) . zip ( q. iter_mut ( ) ) {
55- * v = u64:: from_be_bytes ( chunk. try_into ( ) . unwrap ( ) ) ;
56- }
57- let mut p = [ 0u64 ; COLS ] ;
58- for i in 0 ..COLS {
59- p[ i] = h[ i] ^ q[ i] ;
60- }
61- for i in 0 ..ROUNDS {
62- q = rndq ( q, i) ;
63- }
64- for i in 0 ..ROUNDS {
65- p = rndp ( p, i << 56 ) ;
66- }
67- for i in 0 ..COLS {
68- h[ i] ^= q[ i] ^ p[ i] ;
69- }
70- }
13+ #[ inline( always) ]
14+ pub ( crate ) fn compress( h: & mut [ u64 ; COLS ] , blocks: & [ [ u8 ; 64 ] ] ) {
15+ if cpuid_avx512_gfni:: get( ) {
16+ #[ allow( unsafe_code) ]
17+ unsafe { avx512_gfni:: compress( h, blocks) ; }
18+ } else {
19+ soft:: compress( h, blocks) ;
20+ }
21+ }
7122
72- pub ( crate ) fn p ( h : & [ u64 ; COLS ] ) -> [ u64 ; COLS ] {
73- let mut p = * h;
74- for i in 0 ..ROUNDS {
75- p = rndp ( p, i << 56 ) ;
76- }
77- for i in 0 ..COLS {
78- p[ i] ^= h[ i] ;
79- }
80- p
81- }
23+ #[ inline( always) ]
24+ pub ( crate ) fn p( h: & [ u64 ; COLS ] ) -> [ u64 ; COLS ] {
25+ if cpuid_avx512_gfni:: get( ) {
26+ #[ allow( unsafe_code) ]
27+ unsafe { avx512_gfni:: p( h) }
28+ } else {
29+ soft:: p( h)
30+ }
31+ }
32+ }
33+ }
0 commit comments