Inline triLinearInterpolationNoEase and triLinearInterpolation (#12421)

Performance profiling on Linux AMD64 showed this to be a significant bottleneck. The non-inlined functions are expensive due to XMM registers spilling onto the stack.
master
paradust7 2022-06-11 11:01:30 -07:00 committed by GitHub
parent e7d4ec6834
commit 7ffc0268df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 28 additions and 61 deletions

View File

@ -38,15 +38,6 @@
// Unsigned magic seed prevents undefined behavior.
#define NOISE_MAGIC_SEED 1013U
typedef float (*Interp2dFxn)(
float v00, float v10, float v01, float v11,
float x, float y);
typedef float (*Interp3dFxn)(
float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111,
float x, float y, float z);
FlagDesc flagdesc_noiseparams[] = {
{"defaults", NOISE_FLAG_DEFAULTS},
{"eased", NOISE_FLAG_EASED},
@ -198,47 +189,34 @@ inline float linearInterpolation(float v0, float v1, float t)
inline float biLinearInterpolation(
float v00, float v10,
float v01, float v11,
float x, float y)
{
float tx = easeCurve(x);
float ty = easeCurve(y);
float u = linearInterpolation(v00, v10, tx);
float v = linearInterpolation(v01, v11, tx);
return linearInterpolation(u, v, ty);
}
inline float biLinearInterpolationNoEase(
float v00, float v10,
float v01, float v11,
float x, float y)
float x, float y,
bool eased)
{
// Inlining will optimize this branch out when possible
if (eased) {
x = easeCurve(x);
y = easeCurve(y);
}
float u = linearInterpolation(v00, v10, x);
float v = linearInterpolation(v01, v11, x);
return linearInterpolation(u, v, y);
}
float triLinearInterpolation(
inline float triLinearInterpolation(
float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111,
float x, float y, float z)
float x, float y, float z,
bool eased)
{
float tx = easeCurve(x);
float ty = easeCurve(y);
float tz = easeCurve(z);
float u = biLinearInterpolationNoEase(v000, v100, v010, v110, tx, ty);
float v = biLinearInterpolationNoEase(v001, v101, v011, v111, tx, ty);
return linearInterpolation(u, v, tz);
}
float triLinearInterpolationNoEase(
float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111,
float x, float y, float z)
{
float u = biLinearInterpolationNoEase(v000, v100, v010, v110, x, y);
float v = biLinearInterpolationNoEase(v001, v101, v011, v111, x, y);
// Inlining will optimize this branch out when possible
if (eased) {
x = easeCurve(x);
y = easeCurve(y);
z = easeCurve(z);
}
float u = biLinearInterpolation(v000, v100, v010, v110, x, y, false);
float v = biLinearInterpolation(v001, v101, v011, v111, x, y, false);
return linearInterpolation(u, v, z);
}
@ -256,10 +234,7 @@ float noise2d_gradient(float x, float y, s32 seed, bool eased)
float v01 = noise2d(x0, y0+1, seed);
float v11 = noise2d(x0+1, y0+1, seed);
// Interpolate
if (eased)
return biLinearInterpolation(v00, v10, v01, v11, xl, yl);
return biLinearInterpolationNoEase(v00, v10, v01, v11, xl, yl);
return biLinearInterpolation(v00, v10, v01, v11, xl, yl, eased);
}
@ -283,17 +258,11 @@ float noise3d_gradient(float x, float y, float z, s32 seed, bool eased)
float v011 = noise3d(x0, y0 + 1, z0 + 1, seed);
float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed);
// Interpolate
if (eased) {
return triLinearInterpolation(
v000, v100, v010, v110,
v001, v101, v011, v111,
xl, yl, zl);
}
return triLinearInterpolationNoEase(
return triLinearInterpolation(
v000, v100, v010, v110,
v001, v101, v011, v111,
xl, yl, zl);
xl, yl, zl,
eased);
}
@ -518,9 +487,6 @@ void Noise::gradientMap2D(
s32 x0, y0;
bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED);
Interp2dFxn interpolate = eased ?
biLinearInterpolation : biLinearInterpolationNoEase;
x0 = std::floor(x);
y0 = std::floor(y);
u = x - (float)x0;
@ -547,7 +513,8 @@ void Noise::gradientMap2D(
u = orig_u;
noisex = 0;
for (i = 0; i != sx; i++) {
gradient_buf[index++] = interpolate(v00, v10, v01, v11, u, v);
gradient_buf[index++] =
biLinearInterpolation(v00, v10, v01, v11, u, v, eased);
u += step_x;
if (u >= 1.0) {
@ -583,8 +550,7 @@ void Noise::gradientMap3D(
u32 nlx, nly, nlz;
s32 x0, y0, z0;
Interp3dFxn interpolate = (np.flags & NOISE_FLAG_EASED) ?
triLinearInterpolation : triLinearInterpolationNoEase;
bool eased = np.flags & NOISE_FLAG_EASED;
x0 = std::floor(x);
y0 = std::floor(y);
@ -625,10 +591,11 @@ void Noise::gradientMap3D(
u = orig_u;
noisex = 0;
for (i = 0; i != sx; i++) {
gradient_buf[index++] = interpolate(
gradient_buf[index++] = triLinearInterpolation(
v000, v100, v010, v110,
v001, v101, v011, v111,
u, v, w);
u, v, w,
eased);
u += step_x;
if (u >= 1.0) {