Inline triLinearInterpolationNoEase and triLinearInterpolation (#12421)

Performance profiling on Linux AMD64 showed this to be a significant bottleneck. The non-inlined functions are expensive due to XMM registers spilling onto the stack.
master
paradust7 2022-06-11 11:01:30 -07:00 committed by GitHub
parent e7d4ec6834
commit 7ffc0268df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 28 additions and 61 deletions

View File

@ -38,15 +38,6 @@
// Unsigned magic seed prevents undefined behavior. // Unsigned magic seed prevents undefined behavior.
#define NOISE_MAGIC_SEED 1013U #define NOISE_MAGIC_SEED 1013U
typedef float (*Interp2dFxn)(
float v00, float v10, float v01, float v11,
float x, float y);
typedef float (*Interp3dFxn)(
float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111,
float x, float y, float z);
FlagDesc flagdesc_noiseparams[] = { FlagDesc flagdesc_noiseparams[] = {
{"defaults", NOISE_FLAG_DEFAULTS}, {"defaults", NOISE_FLAG_DEFAULTS},
{"eased", NOISE_FLAG_EASED}, {"eased", NOISE_FLAG_EASED},
@ -198,47 +189,34 @@ inline float linearInterpolation(float v0, float v1, float t)
inline float biLinearInterpolation( inline float biLinearInterpolation(
float v00, float v10, float v00, float v10,
float v01, float v11, float v01, float v11,
float x, float y) float x, float y,
{ bool eased)
float tx = easeCurve(x);
float ty = easeCurve(y);
float u = linearInterpolation(v00, v10, tx);
float v = linearInterpolation(v01, v11, tx);
return linearInterpolation(u, v, ty);
}
inline float biLinearInterpolationNoEase(
float v00, float v10,
float v01, float v11,
float x, float y)
{ {
// Inlining will optimize this branch out when possible
if (eased) {
x = easeCurve(x);
y = easeCurve(y);
}
float u = linearInterpolation(v00, v10, x); float u = linearInterpolation(v00, v10, x);
float v = linearInterpolation(v01, v11, x); float v = linearInterpolation(v01, v11, x);
return linearInterpolation(u, v, y); return linearInterpolation(u, v, y);
} }
float triLinearInterpolation( inline float triLinearInterpolation(
float v000, float v100, float v010, float v110, float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111, float v001, float v101, float v011, float v111,
float x, float y, float z) float x, float y, float z,
bool eased)
{ {
float tx = easeCurve(x); // Inlining will optimize this branch out when possible
float ty = easeCurve(y); if (eased) {
float tz = easeCurve(z); x = easeCurve(x);
float u = biLinearInterpolationNoEase(v000, v100, v010, v110, tx, ty); y = easeCurve(y);
float v = biLinearInterpolationNoEase(v001, v101, v011, v111, tx, ty); z = easeCurve(z);
return linearInterpolation(u, v, tz); }
} float u = biLinearInterpolation(v000, v100, v010, v110, x, y, false);
float v = biLinearInterpolation(v001, v101, v011, v111, x, y, false);
float triLinearInterpolationNoEase(
float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111,
float x, float y, float z)
{
float u = biLinearInterpolationNoEase(v000, v100, v010, v110, x, y);
float v = biLinearInterpolationNoEase(v001, v101, v011, v111, x, y);
return linearInterpolation(u, v, z); return linearInterpolation(u, v, z);
} }
@ -256,10 +234,7 @@ float noise2d_gradient(float x, float y, s32 seed, bool eased)
float v01 = noise2d(x0, y0+1, seed); float v01 = noise2d(x0, y0+1, seed);
float v11 = noise2d(x0+1, y0+1, seed); float v11 = noise2d(x0+1, y0+1, seed);
// Interpolate // Interpolate
if (eased) return biLinearInterpolation(v00, v10, v01, v11, xl, yl, eased);
return biLinearInterpolation(v00, v10, v01, v11, xl, yl);
return biLinearInterpolationNoEase(v00, v10, v01, v11, xl, yl);
} }
@ -283,17 +258,11 @@ float noise3d_gradient(float x, float y, float z, s32 seed, bool eased)
float v011 = noise3d(x0, y0 + 1, z0 + 1, seed); float v011 = noise3d(x0, y0 + 1, z0 + 1, seed);
float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed); float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed);
// Interpolate // Interpolate
if (eased) { return triLinearInterpolation(
return triLinearInterpolation(
v000, v100, v010, v110,
v001, v101, v011, v111,
xl, yl, zl);
}
return triLinearInterpolationNoEase(
v000, v100, v010, v110, v000, v100, v010, v110,
v001, v101, v011, v111, v001, v101, v011, v111,
xl, yl, zl); xl, yl, zl,
eased);
} }
@ -518,9 +487,6 @@ void Noise::gradientMap2D(
s32 x0, y0; s32 x0, y0;
bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED); bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED);
Interp2dFxn interpolate = eased ?
biLinearInterpolation : biLinearInterpolationNoEase;
x0 = std::floor(x); x0 = std::floor(x);
y0 = std::floor(y); y0 = std::floor(y);
u = x - (float)x0; u = x - (float)x0;
@ -547,7 +513,8 @@ void Noise::gradientMap2D(
u = orig_u; u = orig_u;
noisex = 0; noisex = 0;
for (i = 0; i != sx; i++) { for (i = 0; i != sx; i++) {
gradient_buf[index++] = interpolate(v00, v10, v01, v11, u, v); gradient_buf[index++] =
biLinearInterpolation(v00, v10, v01, v11, u, v, eased);
u += step_x; u += step_x;
if (u >= 1.0) { if (u >= 1.0) {
@ -583,8 +550,7 @@ void Noise::gradientMap3D(
u32 nlx, nly, nlz; u32 nlx, nly, nlz;
s32 x0, y0, z0; s32 x0, y0, z0;
Interp3dFxn interpolate = (np.flags & NOISE_FLAG_EASED) ? bool eased = np.flags & NOISE_FLAG_EASED;
triLinearInterpolation : triLinearInterpolationNoEase;
x0 = std::floor(x); x0 = std::floor(x);
y0 = std::floor(y); y0 = std::floor(y);
@ -625,10 +591,11 @@ void Noise::gradientMap3D(
u = orig_u; u = orig_u;
noisex = 0; noisex = 0;
for (i = 0; i != sx; i++) { for (i = 0; i != sx; i++) {
gradient_buf[index++] = interpolate( gradient_buf[index++] = triLinearInterpolation(
v000, v100, v010, v110, v000, v100, v010, v110,
v001, v101, v011, v111, v001, v101, v011, v111,
u, v, w); u, v, w,
eased);
u += step_x; u += step_x;
if (u >= 1.0) { if (u >= 1.0) {