Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a8f091ec authored by Lucas Dupin's avatar Lucas Dupin Committed by Android (Google) Code Review
Browse files

Merge "Gaussian blur optimizations"

parents 0961d568 110a0d1b
Loading
Loading
Loading
Loading
+2 −4
Original line number Diff line number Diff line
@@ -132,8 +132,7 @@ status_t BlurFilter::render(bool multiPass) {
}

string BlurFilter::getVertexShader() const {
    return R"SHADER(
        #version 310 es
    return R"SHADER(#version 310 es

        in vec2 aPosition;
        in highp vec2 aUV;
@@ -147,8 +146,7 @@ string BlurFilter::getVertexShader() const {
}

string BlurFilter::getMixFragShader() const {
    string shader = R"SHADER(
        #version 310 es
    string shader = R"SHADER(#version 310 es
        precision mediump float;

        in highp vec2 vUV;
+87 −39
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ GaussianBlurFilter::GaussianBlurFilter(GLESRenderEngine& engine)
    mVPosLoc = mVerticalProgram.getAttributeLocation("aPosition");
    mVUvLoc = mVerticalProgram.getAttributeLocation("aUV");
    mVTextureLoc = mVerticalProgram.getUniformLocation("uTexture");
    mVIncrementLoc = mVerticalProgram.getUniformLocation("uIncrement");
    mVGaussianOffsetLoc = mVerticalProgram.getUniformLocation("uGaussianOffsets");
    mVNumSamplesLoc = mVerticalProgram.getUniformLocation("uSamples");
    mVGaussianWeightLoc = mVerticalProgram.getUniformLocation("uGaussianWeights");

@@ -51,7 +51,7 @@ GaussianBlurFilter::GaussianBlurFilter(GLESRenderEngine& engine)
    mHPosLoc = mHorizontalProgram.getAttributeLocation("aPosition");
    mHUvLoc = mHorizontalProgram.getAttributeLocation("aUV");
    mHTextureLoc = mHorizontalProgram.getUniformLocation("uTexture");
    mHIncrementLoc = mHorizontalProgram.getUniformLocation("uIncrement");
    mHGaussianOffsetLoc = mHorizontalProgram.getUniformLocation("uGaussianOffsets");
    mHNumSamplesLoc = mHorizontalProgram.getUniformLocation("uSamples");
    mHGaussianWeightLoc = mHorizontalProgram.getUniformLocation("uGaussianWeights");
}
@@ -60,6 +60,36 @@ void GaussianBlurFilter::allocateTextures() {
    mVerticalPassFbo.allocateBuffers(mBlurredFbo.getBufferWidth(), mBlurredFbo.getBufferHeight());
}

static void calculateLinearGaussian(uint32_t samples, double dimension,
                                    GLfloat* gaussianLinearOffsets, GLfloat* gaussianWeights,
                                    GLfloat* gaussianLinearWeights) {
    // The central point in the symmetric bell curve is not offset.
    // This decision allows one less sampling in the GPU.
    gaussianLinearWeights[0] = gaussianWeights[0];
    gaussianLinearOffsets[0] = 0.0;

    // Calculate the linear weights.
    // This is a vector reduction where an element of the packed reduced array
    // contains the sum of two adjacent members of the original packed array.
    // We start preserving the element 1 of the array and then perform sum for
    // every other (i+=2) element of the gaussianWeights array.
    gaussianLinearWeights[1] = gaussianWeights[1];
    const auto start = 1 + ((samples - 1) & 0x1);
    for (size_t i = start; i < samples; i += 2) {
        gaussianLinearWeights[start + i / 2] = gaussianWeights[i] + gaussianWeights[i + 1];
    }

    // Calculate the texture coordinates offsets as an average of the initial offsets,
    // weighted by the Gaussian weights as described in the original article.
    gaussianLinearOffsets[1] = 1.0 / dimension;
    for (size_t i = start; i < samples; i += 2) {
        GLfloat offset_1 = float(i) / dimension;
        GLfloat offset_2 = float(i + 1) / dimension;
        gaussianLinearOffsets[start + i / 2] =
                (offset_1 * gaussianWeights[i] + offset_2 * gaussianWeights[i + 1]) /
                gaussianLinearWeights[start + i / 2];
    }
}
status_t GaussianBlurFilter::prepare() {
    ATRACE_NAME("GaussianBlurFilter::prepare");

@@ -88,27 +118,47 @@ status_t GaussianBlurFilter::prepare() {
    mVerticalPassFbo.bind();
    mVerticalProgram.useProgram();

    // Precompute gaussian bell curve, and send it to the shader to avoid
    // unnecessary computations.
    auto samples = min(mRadius, kNumSamples);
    // Precompute gaussian bell curve, and send it to the shader to avoid unnecessary computations.
    double radiusD = fmax(1.0, mRadius * kFboScale);
    auto samples = int(fmin(radiusD, kNumSamples));
    GLfloat gaussianWeights[kNumSamples] = {};
    for (size_t i = 0; i < samples; i++) {
        float normalized = float(i) / samples;

    gaussianWeights[0] = 1.0f;
    auto totalWeight = gaussianWeights[0];

    // Gaussian weights calculation.
    for (size_t i = 1; i < samples; i++) {
        const double normalized = i / radiusD;
        gaussianWeights[i] = (float)exp(-K * normalized * normalized);
        totalWeight += 2.0 * gaussianWeights[i];
    }

    // Gaussian weights normalization to avoid work in the GPU.
    for (size_t i = 0; i < samples; i++) {
        gaussianWeights[i] /= totalWeight;
    }

    // set uniforms
    auto width = mVerticalPassFbo.getBufferWidth();
    auto height = mVerticalPassFbo.getBufferHeight();
    auto radiusF = fmax(1.0f, mRadius * kFboScale);
    glViewport(0, 0, width, height);

    // Allocate space for the corrected Gaussian weights and offsets.
    // We could use less space, but let's keep the code simple.
    GLfloat gaussianLinearWeights[kNumSamples] = {};
    GLfloat gaussianLinearOffsets[kNumSamples] = {};

    // Calculate the weights and offsets for the vertical pass.
    // This only need to be called every time mRadius or height changes, so it could be optimized.
    calculateLinearGaussian(samples, double(height), gaussianLinearOffsets, gaussianWeights,
                            gaussianLinearWeights);
    // set uniforms
    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, mBlurredFbo.getTextureName());
    glUniform1i(mVTextureLoc, 0);
    glUniform2f(mVIncrementLoc, radiusF / (width * 2.0f), radiusF / (height * 2.0f));
    glUniform1i(mVNumSamplesLoc, samples);
    glUniform1fv(mVGaussianWeightLoc, kNumSamples, gaussianWeights);
    mEngine.checkErrors("Setting vertical-diagonal pass uniforms");
    glUniform1i(mVNumSamplesLoc, 1 + (samples + 1) / 2);
    glUniform1fv(mVGaussianWeightLoc, kNumSamples, gaussianLinearWeights);
    glUniform1fv(mVGaussianOffsetLoc, kNumSamples, gaussianLinearOffsets);
    mEngine.checkErrors("Setting vertical pass uniforms");

    drawMesh(mVUvLoc, mVPosLoc);

@@ -116,14 +166,18 @@ status_t GaussianBlurFilter::prepare() {
    mBlurredFbo.bind();
    mHorizontalProgram.useProgram();

    // Calculate the weights and offsets for the horizontal pass.
    // This only needs to be called every time mRadius or width change, so it could be optimized.
    calculateLinearGaussian(samples, double(width), gaussianLinearOffsets, gaussianWeights,
                            gaussianLinearWeights);
    // set uniforms
    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, mVerticalPassFbo.getTextureName());
    glUniform1i(mHTextureLoc, 0);
    glUniform2f(mHIncrementLoc, radiusF / (width * 2.0f), radiusF / (height * 2.0f));
    glUniform1i(mHNumSamplesLoc, samples);
    glUniform1fv(mHGaussianWeightLoc, kNumSamples, gaussianWeights);
    mEngine.checkErrors("Setting vertical pass uniforms");
    glUniform1i(mHNumSamplesLoc, 1 + (samples + 1) / 2);
    glUniform1fv(mHGaussianWeightLoc, kNumSamples, gaussianLinearWeights);
    glUniform1fv(mHGaussianOffsetLoc, kNumSamples, gaussianLinearOffsets);
    mEngine.checkErrors("Setting horizontal pass uniforms");

    drawMesh(mHUvLoc, mHPosLoc);

@@ -142,43 +196,37 @@ string GaussianBlurFilter::getFragmentShader(bool horizontal) const {
    stringstream shader;
    shader << "#version 310 es\n"
           << "#define DIRECTION " << (horizontal ? "1" : "0") << "\n"
           << "#define NUM_SAMPLES " << kNumSamples <<
           << "#define NUM_SAMPLES " << 1 + (kNumSamples + 1) / 2 <<
            R"SHADER(
        precision mediump float;

        uniform sampler2D uTexture;
        uniform vec2 uIncrement;
        uniform float[NUM_SAMPLES] uGaussianWeights;
        uniform float[NUM_SAMPLES] uGaussianOffsets;
        uniform int uSamples;

        highp in vec2 vUV;
        out vec4 fragColor;

        vec3 gaussianBlur(sampler2D texture, highp vec2 uv, float inc, vec2 direction) {
            float totalWeight = 0.0;
            vec3 blurred = vec3(0.0);
            float fSamples = 1.0 / float(uSamples);

            for (int i = -uSamples; i <= uSamples; i++) {
                float weight = uGaussianWeights[abs(i)];
                float normalized = float(i) * fSamples;
                float radInc = inc * normalized;
                blurred += weight * (texture(texture, radInc * direction + uv, 0.0)).rgb;
                totalWeight += weight;
            }

            return blurred / totalWeight;
        }

        void main() {
            #if DIRECTION == 1
            vec3 color = gaussianBlur(uTexture, vUV, uIncrement.x, vec2(1.0, 0.0));
            const vec2 direction = vec2(1.0, 0.0);
            #else
            vec3 color = gaussianBlur(uTexture, vUV, uIncrement.y, vec2(0.0, 1.0));
            const vec2 direction = vec2(0.0, 1.0);
            #endif
            fragColor = vec4(color, 1.0);

            // Iteration zero outside loop to avoid sampling the central point twice.
            vec4 blurred = uGaussianWeights[0] * (texture(uTexture, vUV, 0.0));

            // Iterate one side of the bell to halve the loop iterations.
            for (int i = 1; i <= uSamples; i++) {
                vec2 offset = uGaussianOffsets[i] * direction;
                blurred += uGaussianWeights[i] * (texture(uTexture, vUV + offset, 0.0));
                blurred += uGaussianWeights[i] * (texture(uTexture, vUV - offset, 0.0));
            }

            fragColor = vec4(blurred.rgb, 1.0);
        }
    )SHADER";
    return shader.str();
}
+6 −3
Original line number Diff line number Diff line
@@ -28,9 +28,12 @@ namespace android {
namespace renderengine {
namespace gl {

// Class that implements a Gaussian Filter that uses Linear Sampling
// to halve the number of samples and reduce runtime by 40% as described in:
// http://rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling
class GaussianBlurFilter : public BlurFilter {
public:
    static constexpr uint32_t kNumSamples = 12;
    static constexpr uint32_t kNumSamples = 22;

    explicit GaussianBlurFilter(GLESRenderEngine& engine);
    status_t prepare() override;
@@ -47,7 +50,7 @@ private:
    GLuint mVPosLoc;
    GLuint mVUvLoc;
    GLuint mVTextureLoc;
    GLuint mVIncrementLoc;
    GLuint mVGaussianOffsetLoc;
    GLuint mVNumSamplesLoc;
    GLuint mVGaussianWeightLoc;

@@ -56,7 +59,7 @@ private:
    GLuint mHPosLoc;
    GLuint mHUvLoc;
    GLuint mHTextureLoc;
    GLuint mHIncrementLoc;
    GLuint mHGaussianOffsetLoc;
    GLuint mHNumSamplesLoc;
    GLuint mHGaussianWeightLoc;
};