Skip to content

Commit 04097b8

Browse files
committed
Removed accessors in vector classes again, cast operators are now all explicit except for one to avoid ambiguities.
1 parent ed04d12 commit 04097b8

52 files changed

Lines changed: 1670 additions & 1721 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

common/math/linearspace3.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ namespace embree
9696
/*! compute transposed matrix */
9797
template<> __forceinline const LinearSpace3<Vec3fa> LinearSpace3<Vec3fa>::transposed() const {
9898
vfloat4 rx,ry,rz; transpose((vfloat4&)vx,(vfloat4&)vy,(vfloat4&)vz,vfloat4(zero),rx,ry,rz);
99-
return LinearSpace3<Vec3fa>(Vec3fa(rx.m128()),Vec3fa(ry.m128()),Vec3fa(rz.m128()));
99+
return LinearSpace3<Vec3fa>(Vec3fa(rx),Vec3fa(ry),Vec3fa(rz));
100100
}
101101
#endif
102102

common/math/vec2fa.h

Lines changed: 63 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ namespace embree
2525
typedef float Scalar;
2626
enum { N = 2 };
2727
union {
28-
__m128 v;
28+
__m128 m128;
2929
struct { float x,y,az,aw; };
3030
};
3131

@@ -34,25 +34,23 @@ namespace embree
3434
////////////////////////////////////////////////////////////////////////////////
3535

3636
__forceinline Vec2fa( ) {}
37-
__forceinline Vec2fa( const __m128 a ) : v(a) {}
37+
__forceinline Vec2fa( const __m128 a ) : m128(a) {}
3838

3939
__forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
4040
__forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
4141

42-
__forceinline Vec2fa ( const Vec2fa& other ) { v = other.v; }
43-
__forceinline Vec2fa& operator =( const Vec2fa& other ) { v = other.v; return *this; }
42+
__forceinline Vec2fa ( const Vec2fa& other ) { m128 = other.m128; }
43+
__forceinline Vec2fa& operator =( const Vec2fa& other ) { m128 = other.m128; return *this; }
4444

45-
__forceinline explicit Vec2fa( const float a ) : v(_mm_set1_ps(a)) {}
46-
__forceinline Vec2fa( const float x, const float y) : v(_mm_set_ps(y, y, y, x)) {}
45+
__forceinline explicit Vec2fa( const float a ) : m128(_mm_set1_ps(a)) {}
46+
__forceinline Vec2fa( const float x, const float y) : m128(_mm_set_ps(y, y, y, x)) {}
4747

4848
#if !defined(_M_ARM64) || defined(__clang__)
49-
__forceinline explicit Vec2fa( const __m128i a ) : v(_mm_cvtepi32_ps(a)) {}
49+
__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
5050
#endif
5151

52-
__forceinline const __m128& m128() const { return v; }
53-
__forceinline __m128& m128() { return v; }
54-
55-
__forceinline operator vfloat4() const { return vfloat4(m128()); }
52+
__forceinline operator const __m128&() const { return m128; }
53+
__forceinline operator __m128&() { return m128; }
5654

5755
////////////////////////////////////////////////////////////////////////////////
5856
/// Loads and Stores
@@ -67,17 +65,17 @@ namespace embree
6765
}
6866

6967
static __forceinline void storeu ( void* ptr, const Vec2fa& v ) {
70-
_mm_storeu_ps((float*)ptr,v.m128());
68+
_mm_storeu_ps((float*)ptr,v);
7169
}
7270

7371
////////////////////////////////////////////////////////////////////////////////
7472
/// Constants
7573
////////////////////////////////////////////////////////////////////////////////
7674

77-
__forceinline Vec2fa( ZeroTy ) : v(_mm_setzero_ps()) {}
78-
__forceinline Vec2fa( OneTy ) : v(_mm_set1_ps(1.0f)) {}
79-
__forceinline Vec2fa( PosInfTy ) : v(_mm_set1_ps(pos_inf)) {}
80-
__forceinline Vec2fa( NegInfTy ) : v(_mm_set1_ps(neg_inf)) {}
75+
__forceinline Vec2fa( ZeroTy ) : m128(_mm_setzero_ps()) {}
76+
__forceinline Vec2fa( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
77+
__forceinline Vec2fa( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
78+
__forceinline Vec2fa( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
8179

8280
////////////////////////////////////////////////////////////////////////////////
8381
/// Array Access
@@ -94,66 +92,66 @@ namespace embree
9492
__forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
9593
__forceinline Vec2fa operator -( const Vec2fa& a ) {
9694
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
97-
return _mm_xor_ps(a.v, mask);
95+
return _mm_xor_ps(a.m128, mask);
9896
}
9997
__forceinline Vec2fa abs ( const Vec2fa& a ) {
10098
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
101-
return _mm_and_ps(a.v, mask);
99+
return _mm_and_ps(a.m128, mask);
102100
}
103101
__forceinline Vec2fa sign ( const Vec2fa& a ) {
104-
return blendv_ps(Vec2fa(one).m128(), (-Vec2fa(one)).m128(), _mm_cmplt_ps (a.m128(),Vec2fa(zero).m128()));
102+
return blendv_ps(Vec2fa(one), -Vec2fa(one), _mm_cmplt_ps (a,Vec2fa(zero)));
105103
}
106104

107105
__forceinline Vec2fa rcp ( const Vec2fa& a )
108106
{
109107
#if defined(__aarch64__) || defined(_M_ARM64)
110-
__m128 reciprocal = _mm_rcp_ps(a.v);
111-
reciprocal = vmulq_f32(vrecpsq_f32(a.v, reciprocal), reciprocal);
112-
reciprocal = vmulq_f32(vrecpsq_f32(a.v, reciprocal), reciprocal);
108+
__m128 reciprocal = _mm_rcp_ps(a.m128);
109+
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
110+
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
113111
return (const Vec2fa)reciprocal;
114112
#else
115113
#if defined(__AVX512VL__)
116-
const Vec2fa r = _mm_rcp14_ps(a.v);
114+
const Vec2fa r = _mm_rcp14_ps(a.m128);
117115
#else
118-
const Vec2fa r = _mm_rcp_ps(a.v);
116+
const Vec2fa r = _mm_rcp_ps(a.m128);
119117
#endif
120118

121119
#if defined(__AVX2__)
122-
const Vec2fa h_n = _mm_fnmadd_ps(a.m128(), r.m128(), vfloat4(1.0).m128()); // First, compute 1 - a * r (which will be very close to 0)
123-
const Vec2fa res = _mm_fmadd_ps(r.m128(), h_n.m128(), r.m128()); // Then compute r + r * h_n
120+
const Vec2fa h_n = _mm_fnmadd_ps(a, r, vfloat4(1.0)); // First, compute 1 - a * r (which will be very close to 0)
121+
const Vec2fa res = _mm_fmadd_ps(r, h_n, r); // Then compute r + r * h_n
124122
#else
125-
const Vec2fa h_n = _mm_sub_ps(vfloat4(1.0f).m128(), _mm_mul_ps(a.m128(), r.m128())); // First, compute 1 - a * r (which will be very close to 0)
126-
const Vec2fa res = _mm_add_ps(r.m128(),_mm_mul_ps(r.m128(), h_n.m128())); // Then compute r + r * h_n
123+
const Vec2fa h_n = _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a, r)); // First, compute 1 - a * r (which will be very close to 0)
124+
const Vec2fa res = _mm_add_ps(r,_mm_mul_ps(r, h_n)); // Then compute r + r * h_n
127125
#endif
128126

129127
return res;
130128
#endif //defined(__aarch64__) || defined(_M_ARM64)
131129
}
132130

133-
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return _mm_sqrt_ps(a.v); }
134-
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return _mm_mul_ps(a.m128(),a.m128()); }
131+
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return _mm_sqrt_ps(a.m128); }
132+
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return _mm_mul_ps(a,a); }
135133

136134
__forceinline Vec2fa rsqrt( const Vec2fa& a )
137135
{
138136
#if defined(__aarch64__) || defined(_M_ARM64)
139-
__m128 r = _mm_rsqrt_ps(a.v);
140-
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.v, r), r));
141-
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.v, r), r));
137+
__m128 r = _mm_rsqrt_ps(a.m128);
138+
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
139+
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
142140
return r;
143141
#else
144142

145143
#if defined(__AVX512VL__)
146-
__m128 r = _mm_rsqrt14_ps(a.v);
144+
__m128 r = _mm_rsqrt14_ps(a.m128);
147145
#else
148-
__m128 r = _mm_rsqrt_ps(a.v);
146+
__m128 r = _mm_rsqrt_ps(a.m128);
149147
#endif
150-
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128(), _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
148+
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
151149

152150
#endif
153151
}
154152

155153
__forceinline Vec2fa zero_fix(const Vec2fa& a) {
156-
return blendv_ps(a.m128(), _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).v, _mm_set1_ps(min_rcp_input)));
154+
return blendv_ps(a, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
157155
}
158156
__forceinline Vec2fa rcp_safe(const Vec2fa& a) {
159157
return rcp(zero_fix(a));
@@ -170,33 +168,33 @@ namespace embree
170168
/// Binary Operators
171169
////////////////////////////////////////////////////////////////////////////////
172170

173-
__forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return _mm_add_ps(a.v, b.v); }
174-
__forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return _mm_sub_ps(a.v, b.v); }
175-
__forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return _mm_mul_ps(a.v, b.v); }
171+
__forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return _mm_add_ps(a.m128, b.m128); }
172+
__forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return _mm_sub_ps(a.m128, b.m128); }
173+
__forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return _mm_mul_ps(a.m128, b.m128); }
176174
__forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
177175
__forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
178-
__forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return _mm_div_ps(a.v,b.v); }
179-
__forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return _mm_div_ps(a.v,_mm_set1_ps(b)); }
180-
__forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.v); }
176+
__forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return _mm_div_ps(a.m128,b.m128); }
177+
__forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
178+
__forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
181179

182-
__forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) { return _mm_min_ps(a.v,b.v); }
183-
__forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) { return _mm_max_ps(a.v,b.v); }
180+
__forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) { return _mm_min_ps(a.m128,b.m128); }
181+
__forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) { return _mm_max_ps(a.m128,b.m128); }
184182

185183
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__SSE4_1__)
186184
__forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
187-
const vint4 ai = _mm_castps_si128(a.m128());
188-
const vint4 bi = _mm_castps_si128(b.m128());
189-
const vint4 ci = _mm_min_epi32(ai.m128i(),bi.m128i());
190-
return _mm_castsi128_ps(ci.m128i());
185+
const vint4 ai = _mm_castps_si128(a);
186+
const vint4 bi = _mm_castps_si128(b);
187+
const vint4 ci = _mm_min_epi32(ai,bi);
188+
return _mm_castsi128_ps(ci);
191189
}
192190
#endif
193191

194192
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__SSE4_1__)
195193
__forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
196-
const vint4 ai = _mm_castps_si128(a.m128());
197-
const vint4 bi = _mm_castps_si128(b.m128());
198-
const vint4 ci = _mm_max_epi32(ai.m128i(),bi.m128i());
199-
return _mm_castsi128_ps(ci.m128i());
194+
const vint4 ai = _mm_castps_si128(a);
195+
const vint4 bi = _mm_castps_si128(b);
196+
const vint4 ci = _mm_max_epi32(ai,bi);
197+
return _mm_castsi128_ps(ci);
200198
}
201199
#endif
202200

@@ -209,10 +207,10 @@ namespace embree
209207
////////////////////////////////////////////////////////////////////////////////
210208

211209
#if defined(__AVX2__)
212-
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmadd_ps(a.m128(),b.m128(),c.m128()); }
213-
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmsub_ps(a.m128(),b.m128(),c.m128()); }
214-
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmadd_ps(a.m128(),b.m128(),c.m128()); }
215-
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmsub_ps(a.m128(),b.m128(),c.m128()); }
210+
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmadd_ps(a,b,c); }
211+
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmsub_ps(a,b,c); }
212+
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmadd_ps(a,b,c); }
213+
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmsub_ps(a,b,c); }
216214
#else
217215
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b+c; }
218216
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b-c; }
@@ -249,16 +247,16 @@ namespace embree
249247
/// Comparison Operators
250248
////////////////////////////////////////////////////////////////////////////////
251249

252-
__forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.v, b.v)) & 3) == 3; }
253-
__forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.v, b.v)) & 3) != 0; }
250+
__forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 3) == 3; }
251+
__forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 3) != 0; }
254252

255253
////////////////////////////////////////////////////////////////////////////////
256254
/// Euclidean Space Operators
257255
////////////////////////////////////////////////////////////////////////////////
258256

259257
#if defined(__SSE4_1__)
260258
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
261-
return _mm_cvtss_f32(_mm_dp_ps(a.m128(),b.m128(),0x3F));
259+
return _mm_cvtss_f32(_mm_dp_ps(a,b,0x3F));
262260
}
263261
#else
264262
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
@@ -283,7 +281,7 @@ namespace embree
283281

284282
__forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
285283
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
286-
return blendv_ps(f.m128(), t.m128(), mask);
284+
return blendv_ps(f, t, mask);
287285
}
288286

289287
__forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
@@ -303,12 +301,12 @@ namespace embree
303301

304302
#if defined(__aarch64__) || defined(_M_ARM64)
305303
//__forceinline Vec2fa trunc(const Vec2fa& a) { return vrndq_f32(a); }
306-
__forceinline Vec2fa floor(const Vec2fa& a) { return vrndmq_f32(a.v); }
307-
__forceinline Vec2fa ceil (const Vec2fa& a) { return vrndpq_f32(a.v); }
304+
__forceinline Vec2fa floor(const Vec2fa& a) { return vrndmq_f32(a); }
305+
__forceinline Vec2fa ceil (const Vec2fa& a) { return vrndpq_f32(a); }
308306
#elif defined (__SSE4_1__)
309307
//__forceinline Vec2fa trunc( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); }
310-
__forceinline Vec2fa floor( const Vec2fa& a ) { return _mm_round_ps(a.m128(), _MM_FROUND_TO_NEG_INF ); }
311-
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return _mm_round_ps(a.m128(), _MM_FROUND_TO_POS_INF ); }
308+
__forceinline Vec2fa floor( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); }
309+
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF ); }
312310
#else
313311
//__forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(truncf(a.x),truncf(a.y),truncf(a.z)); }
314312
__forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(floorf(a.x),floorf(a.y)); }

common/math/vec3.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ namespace embree
295295
#elif defined(__SSE__) || defined(__ARM_NEON) || defined(_M_ARM64)
296296
template<>
297297
__forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
298-
const vfloat4 v = vfloat4(a.v); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v);
298+
const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v);
299299
}
300300
#endif
301301

0 commit comments

Comments
 (0)