@@ -25,7 +25,7 @@ namespace embree
2525 typedef float Scalar;
2626 enum { N = 2 };
2727 union {
28- __m128 v ;
28+ __m128 m128 ;
2929 struct { float x,y,az,aw; };
3030 };
3131
@@ -34,25 +34,23 @@ namespace embree
3434 // //////////////////////////////////////////////////////////////////////////////
3535
3636 __forceinline Vec2fa ( ) {}
37- __forceinline Vec2fa ( const __m128 a ) : v (a) {}
37+ __forceinline Vec2fa ( const __m128 a ) : m128 (a) {}
3838
3939 __forceinline Vec2fa ( const Vec2<float >& other ) { x = other.x ; y = other.y ; }
4040 __forceinline Vec2fa& operator =( const Vec2<float >& other ) { x = other.x ; y = other.y ; return *this ; }
4141
42- __forceinline Vec2fa ( const Vec2fa& other ) { v = other.v ; }
43- __forceinline Vec2fa& operator =( const Vec2fa& other ) { v = other.v ; return *this ; }
42+ __forceinline Vec2fa ( const Vec2fa& other ) { m128 = other.m128 ; }
43+ __forceinline Vec2fa& operator =( const Vec2fa& other ) { m128 = other.m128 ; return *this ; }
4444
45- __forceinline explicit Vec2fa ( const float a ) : v (_mm_set1_ps (a)) {}
46- __forceinline Vec2fa ( const float x, const float y) : v (_mm_set_ps (y, y, y, x)) {}
45+ __forceinline explicit Vec2fa ( const float a ) : m128 (_mm_set1_ps (a)) {}
46+ __forceinline Vec2fa ( const float x, const float y) : m128 (_mm_set_ps (y, y, y, x)) {}
4747
4848#if !defined(_M_ARM64) || defined(__clang__)
49- __forceinline explicit Vec2fa ( const __m128i a ) : v (_mm_cvtepi32_ps (a)) {}
49+ __forceinline explicit Vec2fa ( const __m128i a ) : m128 (_mm_cvtepi32_ps (a)) {}
5050#endif
5151
52- __forceinline const __m128& m128 () const { return v; }
53- __forceinline __m128& m128 () { return v; }
54-
55- __forceinline operator vfloat4 () const { return vfloat4 (m128 ()); }
52+ __forceinline operator const __m128&() const { return m128; }
53+ __forceinline operator __m128&() { return m128; }
5654
5755 // //////////////////////////////////////////////////////////////////////////////
5856 // / Loads and Stores
@@ -67,17 +65,17 @@ namespace embree
6765 }
6866
6967 static __forceinline void storeu ( void * ptr, const Vec2fa& v ) {
70- _mm_storeu_ps ((float *)ptr,v. m128 () );
68+ _mm_storeu_ps ((float *)ptr,v);
7169 }
7270
7371 // //////////////////////////////////////////////////////////////////////////////
7472 // / Constants
7573 // //////////////////////////////////////////////////////////////////////////////
7674
77- __forceinline Vec2fa ( ZeroTy ) : v (_mm_setzero_ps ()) {}
78- __forceinline Vec2fa ( OneTy ) : v (_mm_set1_ps (1 .0f )) {}
79- __forceinline Vec2fa ( PosInfTy ) : v (_mm_set1_ps (pos_inf)) {}
80- __forceinline Vec2fa ( NegInfTy ) : v (_mm_set1_ps (neg_inf)) {}
75+ __forceinline Vec2fa ( ZeroTy ) : m128 (_mm_setzero_ps ()) {}
76+ __forceinline Vec2fa ( OneTy ) : m128 (_mm_set1_ps (1 .0f )) {}
77+ __forceinline Vec2fa ( PosInfTy ) : m128 (_mm_set1_ps (pos_inf)) {}
78+ __forceinline Vec2fa ( NegInfTy ) : m128 (_mm_set1_ps (neg_inf)) {}
8179
8280 // //////////////////////////////////////////////////////////////////////////////
8381 // / Array Access
@@ -94,66 +92,66 @@ namespace embree
9492 __forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
9593 __forceinline Vec2fa operator -( const Vec2fa& a ) {
9694 const __m128 mask = _mm_castsi128_ps (_mm_set1_epi32 (0x80000000 ));
97- return _mm_xor_ps (a.v , mask);
95+ return _mm_xor_ps (a.m128 , mask);
9896 }
9997 __forceinline Vec2fa abs ( const Vec2fa& a ) {
10098 const __m128 mask = _mm_castsi128_ps (_mm_set1_epi32 (0x7fffffff ));
101- return _mm_and_ps (a.v , mask);
99+ return _mm_and_ps (a.m128 , mask);
102100 }
103101 __forceinline Vec2fa sign ( const Vec2fa& a ) {
104- return blendv_ps (Vec2fa (one). m128 (), ( -Vec2fa (one)). m128 () , _mm_cmplt_ps (a. m128 () ,Vec2fa (zero). m128 ( )));
102+ return blendv_ps (Vec2fa (one), -Vec2fa (one), _mm_cmplt_ps (a,Vec2fa (zero)));
105103 }
106104
107105 __forceinline Vec2fa rcp ( const Vec2fa& a )
108106 {
109107#if defined(__aarch64__) || defined(_M_ARM64)
110- __m128 reciprocal = _mm_rcp_ps (a.v );
111- reciprocal = vmulq_f32 (vrecpsq_f32 (a.v , reciprocal), reciprocal);
112- reciprocal = vmulq_f32 (vrecpsq_f32 (a.v , reciprocal), reciprocal);
108+ __m128 reciprocal = _mm_rcp_ps (a.m128 );
109+ reciprocal = vmulq_f32 (vrecpsq_f32 (a.m128 , reciprocal), reciprocal);
110+ reciprocal = vmulq_f32 (vrecpsq_f32 (a.m128 , reciprocal), reciprocal);
113111 return (const Vec2fa)reciprocal;
114112#else
115113#if defined(__AVX512VL__)
116- const Vec2fa r = _mm_rcp14_ps (a.v );
114+ const Vec2fa r = _mm_rcp14_ps (a.m128 );
117115#else
118- const Vec2fa r = _mm_rcp_ps (a.v );
116+ const Vec2fa r = _mm_rcp_ps (a.m128 );
119117#endif
120118
121119#if defined(__AVX2__)
122- const Vec2fa h_n = _mm_fnmadd_ps (a. m128 () , r. m128 () , vfloat4 (1.0 ). m128 ( )); // First, compute 1 - a * r (which will be very close to 0)
123- const Vec2fa res = _mm_fmadd_ps (r. m128 () , h_n. m128 () , r. m128 () ); // Then compute r + r * h_n
120+ const Vec2fa h_n = _mm_fnmadd_ps (a, r, vfloat4 (1.0 )); // First, compute 1 - a * r (which will be very close to 0)
121+ const Vec2fa res = _mm_fmadd_ps (r, h_n, r); // Then compute r + r * h_n
124122#else
125- const Vec2fa h_n = _mm_sub_ps (vfloat4 (1 .0f ). m128 () , _mm_mul_ps (a. m128 () , r. m128 () )); // First, compute 1 - a * r (which will be very close to 0)
126- const Vec2fa res = _mm_add_ps (r. m128 () ,_mm_mul_ps (r. m128 () , h_n. m128 () )); // Then compute r + r * h_n
123+ const Vec2fa h_n = _mm_sub_ps (vfloat4 (1 .0f ), _mm_mul_ps (a, r)); // First, compute 1 - a * r (which will be very close to 0)
124+ const Vec2fa res = _mm_add_ps (r,_mm_mul_ps (r, h_n)); // Then compute r + r * h_n
127125#endif
128126
129127 return res;
130128#endif // defined(__aarch64__) || defined(_M_ARM64)
131129 }
132130
133- __forceinline Vec2fa sqrt ( const Vec2fa& a ) { return _mm_sqrt_ps (a.v ); }
134- __forceinline Vec2fa sqr ( const Vec2fa& a ) { return _mm_mul_ps (a. m128 (),a. m128 () ); }
131+ __forceinline Vec2fa sqrt ( const Vec2fa& a ) { return _mm_sqrt_ps (a.m128 ); }
132+ __forceinline Vec2fa sqr ( const Vec2fa& a ) { return _mm_mul_ps (a,a ); }
135133
136134 __forceinline Vec2fa rsqrt ( const Vec2fa& a )
137135 {
138136#if defined(__aarch64__) || defined(_M_ARM64)
139- __m128 r = _mm_rsqrt_ps (a.v );
140- r = vmulq_f32 (r, vrsqrtsq_f32 (vmulq_f32 (a.v , r), r));
141- r = vmulq_f32 (r, vrsqrtsq_f32 (vmulq_f32 (a.v , r), r));
137+ __m128 r = _mm_rsqrt_ps (a.m128 );
138+ r = vmulq_f32 (r, vrsqrtsq_f32 (vmulq_f32 (a.m128 , r), r));
139+ r = vmulq_f32 (r, vrsqrtsq_f32 (vmulq_f32 (a.m128 , r), r));
142140 return r;
143141#else
144142
145143#if defined(__AVX512VL__)
146- __m128 r = _mm_rsqrt14_ps (a.v );
144+ __m128 r = _mm_rsqrt14_ps (a.m128 );
147145#else
148- __m128 r = _mm_rsqrt_ps (a.v );
146+ __m128 r = _mm_rsqrt_ps (a.m128 );
149147#endif
150- return _mm_add_ps (_mm_mul_ps (_mm_set1_ps (1 .5f ),r), _mm_mul_ps (_mm_mul_ps (_mm_mul_ps (a. m128 () , _mm_set1_ps (-0 .5f )), r), _mm_mul_ps (r, r)));
148+ return _mm_add_ps (_mm_mul_ps (_mm_set1_ps (1 .5f ),r), _mm_mul_ps (_mm_mul_ps (_mm_mul_ps (a, _mm_set1_ps (-0 .5f )), r), _mm_mul_ps (r, r)));
151149
152150#endif
153151 }
154152
155153 __forceinline Vec2fa zero_fix (const Vec2fa& a) {
156- return blendv_ps (a. m128 () , _mm_set1_ps (min_rcp_input), _mm_cmplt_ps (abs (a).v , _mm_set1_ps (min_rcp_input)));
154+ return blendv_ps (a, _mm_set1_ps (min_rcp_input), _mm_cmplt_ps (abs (a).m128 , _mm_set1_ps (min_rcp_input)));
157155 }
158156 __forceinline Vec2fa rcp_safe (const Vec2fa& a) {
159157 return rcp (zero_fix (a));
@@ -170,33 +168,33 @@ namespace embree
170168 // / Binary Operators
171169 // //////////////////////////////////////////////////////////////////////////////
172170
173- __forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return _mm_add_ps (a.v , b.v ); }
174- __forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return _mm_sub_ps (a.v , b.v ); }
175- __forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return _mm_mul_ps (a.v , b.v ); }
171+ __forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return _mm_add_ps (a.m128 , b.m128 ); }
172+ __forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return _mm_sub_ps (a.m128 , b.m128 ); }
173+ __forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return _mm_mul_ps (a.m128 , b.m128 ); }
176174 __forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa (b); }
177175 __forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa (a) * b; }
178- __forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return _mm_div_ps (a.v ,b.v ); }
179- __forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return _mm_div_ps (a.v ,_mm_set1_ps (b)); }
180- __forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return _mm_div_ps (_mm_set1_ps (a),b.v ); }
176+ __forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return _mm_div_ps (a.m128 ,b.m128 ); }
177+ __forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return _mm_div_ps (a.m128 ,_mm_set1_ps (b)); }
178+ __forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return _mm_div_ps (_mm_set1_ps (a),b.m128 ); }
181179
182- __forceinline Vec2fa min ( const Vec2fa& a, const Vec2fa& b ) { return _mm_min_ps (a.v ,b.v ); }
183- __forceinline Vec2fa max ( const Vec2fa& a, const Vec2fa& b ) { return _mm_max_ps (a.v ,b.v ); }
180+ __forceinline Vec2fa min ( const Vec2fa& a, const Vec2fa& b ) { return _mm_min_ps (a.m128 ,b.m128 ); }
181+ __forceinline Vec2fa max ( const Vec2fa& a, const Vec2fa& b ) { return _mm_max_ps (a.m128 ,b.m128 ); }
184182
185183#if defined(__aarch64__) || defined(_M_ARM64) || defined(__SSE4_1__)
186184 __forceinline Vec2fa mini (const Vec2fa& a, const Vec2fa& b) {
187- const vint4 ai = _mm_castps_si128 (a. m128 () );
188- const vint4 bi = _mm_castps_si128 (b. m128 () );
189- const vint4 ci = _mm_min_epi32 (ai. m128i () ,bi. m128i () );
190- return _mm_castsi128_ps (ci. m128i () );
185+ const vint4 ai = _mm_castps_si128 (a);
186+ const vint4 bi = _mm_castps_si128 (b);
187+ const vint4 ci = _mm_min_epi32 (ai,bi);
188+ return _mm_castsi128_ps (ci);
191189 }
192190#endif
193191
194192#if defined(__aarch64__) || defined(_M_ARM64) || defined(__SSE4_1__)
195193 __forceinline Vec2fa maxi (const Vec2fa& a, const Vec2fa& b) {
196- const vint4 ai = _mm_castps_si128 (a. m128 () );
197- const vint4 bi = _mm_castps_si128 (b. m128 () );
198- const vint4 ci = _mm_max_epi32 (ai. m128i () ,bi. m128i () );
199- return _mm_castsi128_ps (ci. m128i () );
194+ const vint4 ai = _mm_castps_si128 (a);
195+ const vint4 bi = _mm_castps_si128 (b);
196+ const vint4 ci = _mm_max_epi32 (ai,bi);
197+ return _mm_castsi128_ps (ci);
200198 }
201199#endif
202200
@@ -209,10 +207,10 @@ namespace embree
209207 // //////////////////////////////////////////////////////////////////////////////
210208
211209#if defined(__AVX2__)
212- __forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmadd_ps (a. m128 (),b. m128 (),c. m128 () ); }
213- __forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmsub_ps (a. m128 (),b. m128 (),c. m128 () ); }
214- __forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmadd_ps (a. m128 (),b. m128 (),c. m128 () ); }
215- __forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmsub_ps (a. m128 (),b. m128 (),c. m128 () ); }
210+ __forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmadd_ps (a,b,c ); }
211+ __forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmsub_ps (a,b,c ); }
212+ __forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmadd_ps (a,b,c ); }
213+ __forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmsub_ps (a,b,c ); }
216214#else
217215 __forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b+c; }
218216 __forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b-c; }
@@ -249,16 +247,16 @@ namespace embree
249247 // / Comparison Operators
250248 // //////////////////////////////////////////////////////////////////////////////
251249
252- __forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps (_mm_cmpeq_ps (a.v , b.v )) & 3 ) == 3 ; }
253- __forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps (_mm_cmpneq_ps (a.v , b.v )) & 3 ) != 0 ; }
250+ __forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps (_mm_cmpeq_ps (a.m128 , b.m128 )) & 3 ) == 3 ; }
251+ __forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps (_mm_cmpneq_ps (a.m128 , b.m128 )) & 3 ) != 0 ; }
254252
255253 // //////////////////////////////////////////////////////////////////////////////
256254 // / Euclidean Space Operators
257255 // //////////////////////////////////////////////////////////////////////////////
258256
259257#if defined(__SSE4_1__)
260258 __forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
261- return _mm_cvtss_f32 (_mm_dp_ps (a. m128 (),b. m128 () ,0x3F ));
259+ return _mm_cvtss_f32 (_mm_dp_ps (a,b ,0x3F ));
262260 }
263261#else
264262 __forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
@@ -283,7 +281,7 @@ namespace embree
283281
284282 __forceinline Vec2fa select ( bool s, const Vec2fa& t, const Vec2fa& f ) {
285283 __m128 mask = s ? _mm_castsi128_ps (_mm_cmpeq_epi32 (_mm_setzero_si128 (), _mm_setzero_si128 ())) : _mm_setzero_ps ();
286- return blendv_ps (f. m128 () , t. m128 () , mask);
284+ return blendv_ps (f, t, mask);
287285 }
288286
289287 __forceinline Vec2fa lerp (const Vec2fa& v0, const Vec2fa& v1, const float t) {
@@ -303,12 +301,12 @@ namespace embree
303301
304302#if defined(__aarch64__) || defined(_M_ARM64)
305303 // __forceinline Vec2fa trunc(const Vec2fa& a) { return vrndq_f32(a); }
306- __forceinline Vec2fa floor (const Vec2fa& a) { return vrndmq_f32 (a. v ); }
307- __forceinline Vec2fa ceil (const Vec2fa& a) { return vrndpq_f32 (a. v ); }
304+ __forceinline Vec2fa floor (const Vec2fa& a) { return vrndmq_f32 (a); }
305+ __forceinline Vec2fa ceil (const Vec2fa& a) { return vrndpq_f32 (a); }
308306#elif defined (__SSE4_1__)
309307 // __forceinline Vec2fa trunc( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); }
310- __forceinline Vec2fa floor ( const Vec2fa& a ) { return _mm_round_ps (a. m128 () , _MM_FROUND_TO_NEG_INF ); }
311- __forceinline Vec2fa ceil ( const Vec2fa& a ) { return _mm_round_ps (a. m128 () , _MM_FROUND_TO_POS_INF ); }
308+ __forceinline Vec2fa floor ( const Vec2fa& a ) { return _mm_round_ps (a, _MM_FROUND_TO_NEG_INF ); }
309+ __forceinline Vec2fa ceil ( const Vec2fa& a ) { return _mm_round_ps (a, _MM_FROUND_TO_POS_INF ); }
312310#else
313311 // __forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(truncf(a.x),truncf(a.y),truncf(a.z)); }
314312 __forceinline Vec2fa floor ( const Vec2fa& a ) { return Vec2fa (floorf (a.x ),floorf (a.y )); }
0 commit comments