Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 110a0d1b authored by Lucas Dupin's avatar Lucas Dupin
Browse files

Gaussian blur optimizations

Test: transaction tests
Test: visual
Bug: 149309004
Change-Id: I5ddcd674e7068fa05482b3f6f2eb1efb1c4a4b73
parent f22c0ebb
Loading
Loading
Loading
Loading
+2 −4
Original line number Diff line number Diff line
@@ -132,8 +132,7 @@ status_t BlurFilter::render(bool multiPass) {
}

string BlurFilter::getVertexShader() const {
    return R"SHADER(
        #version 310 es
    return R"SHADER(#version 310 es

        in vec2 aPosition;
        in highp vec2 aUV;
@@ -147,8 +146,7 @@ string BlurFilter::getVertexShader() const {
}

string BlurFilter::getMixFragShader() const {
    string shader = R"SHADER(
        #version 310 es
    string shader = R"SHADER(#version 310 es
        precision mediump float;

        in highp vec2 vUV;
+87 −39
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ GaussianBlurFilter::GaussianBlurFilter(GLESRenderEngine& engine)
    mVPosLoc = mVerticalProgram.getAttributeLocation("aPosition");
    mVUvLoc = mVerticalProgram.getAttributeLocation("aUV");
    mVTextureLoc = mVerticalProgram.getUniformLocation("uTexture");
    mVIncrementLoc = mVerticalProgram.getUniformLocation("uIncrement");
    mVGaussianOffsetLoc = mVerticalProgram.getUniformLocation("uGaussianOffsets");
    mVNumSamplesLoc = mVerticalProgram.getUniformLocation("uSamples");
    mVGaussianWeightLoc = mVerticalProgram.getUniformLocation("uGaussianWeights");

@@ -51,7 +51,7 @@ GaussianBlurFilter::GaussianBlurFilter(GLESRenderEngine& engine)
    mHPosLoc = mHorizontalProgram.getAttributeLocation("aPosition");
    mHUvLoc = mHorizontalProgram.getAttributeLocation("aUV");
    mHTextureLoc = mHorizontalProgram.getUniformLocation("uTexture");
    mHIncrementLoc = mHorizontalProgram.getUniformLocation("uIncrement");
    mHGaussianOffsetLoc = mHorizontalProgram.getUniformLocation("uGaussianOffsets");
    mHNumSamplesLoc = mHorizontalProgram.getUniformLocation("uSamples");
    mHGaussianWeightLoc = mHorizontalProgram.getUniformLocation("uGaussianWeights");
}
@@ -60,6 +60,36 @@ void GaussianBlurFilter::allocateTextures() {
    mVerticalPassFbo.allocateBuffers(mBlurredFbo.getBufferWidth(), mBlurredFbo.getBufferHeight());
}

static void calculateLinearGaussian(uint32_t samples, double dimension,
                                    GLfloat* gaussianLinearOffsets, GLfloat* gaussianWeights,
                                    GLfloat* gaussianLinearWeights) {
    // The central point in the symmetric bell curve is not offset.
    // This decision allows one less sampling in the GPU.
    gaussianLinearWeights[0] = gaussianWeights[0];
    gaussianLinearOffsets[0] = 0.0;

    // Calculate the linear weights.
    // This is a vector reduction where an element of the packed reduced array
    // contains the sum of two adjacent members of the original packed array.
    // We start preserving the element 1 of the array and then perform sum for
    // every other (i+=2) element of the gaussianWeights array.
    gaussianLinearWeights[1] = gaussianWeights[1];
    const auto start = 1 + ((samples - 1) & 0x1);
    for (size_t i = start; i < samples; i += 2) {
        gaussianLinearWeights[start + i / 2] = gaussianWeights[i] + gaussianWeights[i + 1];
    }

    // Calculate the texture coordinates offsets as an average of the initial offsets,
    // weighted by the Gaussian weights as described in the original article.
    gaussianLinearOffsets[1] = 1.0 / dimension;
    for (size_t i = start; i < samples; i += 2) {
        GLfloat offset_1 = float(i) / dimension;
        GLfloat offset_2 = float(i + 1) / dimension;
        gaussianLinearOffsets[start + i / 2] =
                (offset_1 * gaussianWeights[i] + offset_2 * gaussianWeights[i + 1]) /
                gaussianLinearWeights[start + i / 2];
    }
}
status_t GaussianBlurFilter::prepare() {
    ATRACE_NAME("GaussianBlurFilter::prepare");

@@ -88,27 +118,47 @@ status_t GaussianBlurFilter::prepare() {
    mVerticalPassFbo.bind();
    mVerticalProgram.useProgram();

    // Precompute gaussian bell curve, and send it to the shader to avoid
    // unnecessary computations.
    auto samples = min(mRadius, kNumSamples);
    // Precompute gaussian bell curve, and send it to the shader to avoid unnecessary computations.
    double radiusD = fmax(1.0, mRadius * kFboScale);
    auto samples = int(fmin(radiusD, kNumSamples));
    GLfloat gaussianWeights[kNumSamples] = {};
    for (size_t i = 0; i < samples; i++) {
        float normalized = float(i) / samples;

    gaussianWeights[0] = 1.0f;
    auto totalWeight = gaussianWeights[0];

    // Gaussian weights calculation.
    for (size_t i = 1; i < samples; i++) {
        const double normalized = i / radiusD;
        gaussianWeights[i] = (float)exp(-K * normalized * normalized);
        totalWeight += 2.0 * gaussianWeights[i];
    }

    // Gaussian weights normalization to avoid work in the GPU.
    for (size_t i = 0; i < samples; i++) {
        gaussianWeights[i] /= totalWeight;
    }

    // set uniforms
    auto width = mVerticalPassFbo.getBufferWidth();
    auto height = mVerticalPassFbo.getBufferHeight();
    auto radiusF = fmax(1.0f, mRadius * kFboScale);
    glViewport(0, 0, width, height);

    // Allocate space for the corrected Gaussian weights and offsets.
    // We could use less space, but let's keep the code simple.
    GLfloat gaussianLinearWeights[kNumSamples] = {};
    GLfloat gaussianLinearOffsets[kNumSamples] = {};

    // Calculate the weights and offsets for the vertical pass.
    // This only need to be called every time mRadius or height changes, so it could be optimized.
    calculateLinearGaussian(samples, double(height), gaussianLinearOffsets, gaussianWeights,
                            gaussianLinearWeights);
    // set uniforms
    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, mBlurredFbo.getTextureName());
    glUniform1i(mVTextureLoc, 0);
    glUniform2f(mVIncrementLoc, radiusF / (width * 2.0f), radiusF / (height * 2.0f));
    glUniform1i(mVNumSamplesLoc, samples);
    glUniform1fv(mVGaussianWeightLoc, kNumSamples, gaussianWeights);
    mEngine.checkErrors("Setting vertical-diagonal pass uniforms");
    glUniform1i(mVNumSamplesLoc, 1 + (samples + 1) / 2);
    glUniform1fv(mVGaussianWeightLoc, kNumSamples, gaussianLinearWeights);
    glUniform1fv(mVGaussianOffsetLoc, kNumSamples, gaussianLinearOffsets);
    mEngine.checkErrors("Setting vertical pass uniforms");

    drawMesh(mVUvLoc, mVPosLoc);

@@ -116,14 +166,18 @@ status_t GaussianBlurFilter::prepare() {
    mBlurredFbo.bind();
    mHorizontalProgram.useProgram();

    // Calculate the weights and offsets for the horizontal pass.
    // This only needs to be called every time mRadius or width change, so it could be optimized.
    calculateLinearGaussian(samples, double(width), gaussianLinearOffsets, gaussianWeights,
                            gaussianLinearWeights);
    // set uniforms
    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, mVerticalPassFbo.getTextureName());
    glUniform1i(mHTextureLoc, 0);
    glUniform2f(mHIncrementLoc, radiusF / (width * 2.0f), radiusF / (height * 2.0f));
    glUniform1i(mHNumSamplesLoc, samples);
    glUniform1fv(mHGaussianWeightLoc, kNumSamples, gaussianWeights);
    mEngine.checkErrors("Setting vertical pass uniforms");
    glUniform1i(mHNumSamplesLoc, 1 + (samples + 1) / 2);
    glUniform1fv(mHGaussianWeightLoc, kNumSamples, gaussianLinearWeights);
    glUniform1fv(mHGaussianOffsetLoc, kNumSamples, gaussianLinearOffsets);
    mEngine.checkErrors("Setting horizontal pass uniforms");

    drawMesh(mHUvLoc, mHPosLoc);

@@ -142,43 +196,37 @@ string GaussianBlurFilter::getFragmentShader(bool horizontal) const {
    stringstream shader;
    shader << "#version 310 es\n"
           << "#define DIRECTION " << (horizontal ? "1" : "0") << "\n"
           << "#define NUM_SAMPLES " << kNumSamples <<
           << "#define NUM_SAMPLES " << 1 + (kNumSamples + 1) / 2 <<
            R"SHADER(
        precision mediump float;

        uniform sampler2D uTexture;
        uniform vec2 uIncrement;
        uniform float[NUM_SAMPLES] uGaussianWeights;
        uniform float[NUM_SAMPLES] uGaussianOffsets;
        uniform int uSamples;

        highp in vec2 vUV;
        out vec4 fragColor;

        vec3 gaussianBlur(sampler2D texture, highp vec2 uv, float inc, vec2 direction) {
            float totalWeight = 0.0;
            vec3 blurred = vec3(0.0);
            float fSamples = 1.0 / float(uSamples);

            for (int i = -uSamples; i <= uSamples; i++) {
                float weight = uGaussianWeights[abs(i)];
                float normalized = float(i) * fSamples;
                float radInc = inc * normalized;
                blurred += weight * (texture(texture, radInc * direction + uv, 0.0)).rgb;
                totalWeight += weight;
            }

            return blurred / totalWeight;
        }

        void main() {
            #if DIRECTION == 1
            vec3 color = gaussianBlur(uTexture, vUV, uIncrement.x, vec2(1.0, 0.0));
            const vec2 direction = vec2(1.0, 0.0);
            #else
            vec3 color = gaussianBlur(uTexture, vUV, uIncrement.y, vec2(0.0, 1.0));
            const vec2 direction = vec2(0.0, 1.0);
            #endif
            fragColor = vec4(color, 1.0);

            // Iteration zero outside loop to avoid sampling the central point twice.
            vec4 blurred = uGaussianWeights[0] * (texture(uTexture, vUV, 0.0));

            // Iterate one side of the bell to halve the loop iterations.
            for (int i = 1; i <= uSamples; i++) {
                vec2 offset = uGaussianOffsets[i] * direction;
                blurred += uGaussianWeights[i] * (texture(uTexture, vUV + offset, 0.0));
                blurred += uGaussianWeights[i] * (texture(uTexture, vUV - offset, 0.0));
            }

            fragColor = vec4(blurred.rgb, 1.0);
        }
    )SHADER";
    return shader.str();
}
+6 −3
Original line number Diff line number Diff line
@@ -28,9 +28,12 @@ namespace android {
namespace renderengine {
namespace gl {

// Class that implements a Gaussian Filter that uses Linear Sampling
// to halve the number of samples and reduce runtime by 40% as described in:
// http://rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling
class GaussianBlurFilter : public BlurFilter {
public:
    static constexpr uint32_t kNumSamples = 12;
    static constexpr uint32_t kNumSamples = 22;

    explicit GaussianBlurFilter(GLESRenderEngine& engine);
    status_t prepare() override;
@@ -47,7 +50,7 @@ private:
    GLuint mVPosLoc;
    GLuint mVUvLoc;
    GLuint mVTextureLoc;
    GLuint mVIncrementLoc;
    GLuint mVGaussianOffsetLoc;
    GLuint mVNumSamplesLoc;
    GLuint mVGaussianWeightLoc;

@@ -56,7 +59,7 @@ private:
    GLuint mHPosLoc;
    GLuint mHUvLoc;
    GLuint mHTextureLoc;
    GLuint mHIncrementLoc;
    GLuint mHGaussianOffsetLoc;
    GLuint mHNumSamplesLoc;
    GLuint mHGaussianWeightLoc;
};