Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f44c6b8c authored by Paul Thomson's avatar Paul Thomson
Browse files

Add GPU work metrics processing

Add gpu_work eBPF program that defines the format of and processes the
power/gpu_work_period tracepoint.

Add code to gpuservice that attaches the eBPF program and reports the
GPU time in frequency state stats via `dumpsys gpu --gpuwork`.

Bug: b/213577594
Change-Id: I2774909e8a375a8b20e6fb55f8069ecdbe650ef2
parent 573b51eb
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -31,6 +31,7 @@ cc_defaults {
        "libcutils",
        "libcutils",
        "libgfxstats",
        "libgfxstats",
        "libgpumem",
        "libgpumem",
        "libgpuwork",
        "libgpumemtracer",
        "libgpumemtracer",
        "libgraphicsenv",
        "libgraphicsenv",
        "liblog",
        "liblog",
+18 −3
Original line number Original line Diff line number Diff line
@@ -25,6 +25,7 @@
#include <binder/PermissionCache.h>
#include <binder/PermissionCache.h>
#include <cutils/properties.h>
#include <cutils/properties.h>
#include <gpumem/GpuMem.h>
#include <gpumem/GpuMem.h>
#include <gpuwork/GpuWork.h>
#include <gpustats/GpuStats.h>
#include <gpustats/GpuStats.h>
#include <private/android_filesystem_config.h>
#include <private/android_filesystem_config.h>
#include <tracing/GpuMemTracer.h>
#include <tracing/GpuMemTracer.h>
@@ -50,13 +51,20 @@ const char* const GpuService::SERVICE_NAME = "gpu";


GpuService::GpuService()
GpuService::GpuService()
      : mGpuMem(std::make_shared<GpuMem>()),
      : mGpuMem(std::make_shared<GpuMem>()),
        mGpuWork(std::make_shared<gpuwork::GpuWork>()),
        mGpuStats(std::make_unique<GpuStats>()),
        mGpuStats(std::make_unique<GpuStats>()),
        mGpuMemTracer(std::make_unique<GpuMemTracer>()) {
        mGpuMemTracer(std::make_unique<GpuMemTracer>()) {
    std::thread asyncInitThread([this]() {

    std::thread gpuMemAsyncInitThread([this]() {
        mGpuMem->initialize();
        mGpuMem->initialize();
        mGpuMemTracer->initialize(mGpuMem);
        mGpuMemTracer->initialize(mGpuMem);
    });
    });
    asyncInitThread.detach();
    gpuMemAsyncInitThread.detach();

    std::thread gpuWorkAsyncInitThread([this]() {
        mGpuWork->initialize();
    });
    gpuWorkAsyncInitThread.detach();
};
};


void GpuService::setGpuStats(const std::string& driverPackageName,
void GpuService::setGpuStats(const std::string& driverPackageName,
@@ -124,6 +132,7 @@ status_t GpuService::doDump(int fd, const Vector<String16>& args, bool /*asProto
        bool dumpDriverInfo = false;
        bool dumpDriverInfo = false;
        bool dumpMem = false;
        bool dumpMem = false;
        bool dumpStats = false;
        bool dumpStats = false;
        bool dumpWork = false;
        size_t numArgs = args.size();
        size_t numArgs = args.size();


        if (numArgs) {
        if (numArgs) {
@@ -134,9 +143,11 @@ status_t GpuService::doDump(int fd, const Vector<String16>& args, bool /*asProto
                    dumpDriverInfo = true;
                    dumpDriverInfo = true;
                } else if (args[index] == String16("--gpumem")) {
                } else if (args[index] == String16("--gpumem")) {
                    dumpMem = true;
                    dumpMem = true;
                } else if (args[index] == String16("--gpuwork")) {
                    dumpWork = true;
                }
                }
            }
            }
            dumpAll = !(dumpDriverInfo || dumpMem || dumpStats);
            dumpAll = !(dumpDriverInfo || dumpMem || dumpStats || dumpWork);
        }
        }


        if (dumpAll || dumpDriverInfo) {
        if (dumpAll || dumpDriverInfo) {
@@ -151,6 +162,10 @@ status_t GpuService::doDump(int fd, const Vector<String16>& args, bool /*asProto
            mGpuStats->dump(args, &result);
            mGpuStats->dump(args, &result);
            result.append("\n");
            result.append("\n");
        }
        }
         if (dumpAll || dumpWork) {
            mGpuWork->dump(args, &result);
            result.append("\n");
        }
    }
    }


    write(fd, result.c_str(), result.size());
    write(fd, result.c_str(), result.size());
+5 −0
Original line number Original line Diff line number Diff line
@@ -28,6 +28,10 @@


namespace android {
namespace android {


namespace gpuwork {
class GpuWork;
}

class GpuMem;
class GpuMem;
class GpuStats;
class GpuStats;
class GpuMemTracer;
class GpuMemTracer;
@@ -77,6 +81,7 @@ private:
     * Attributes
     * Attributes
     */
     */
    std::shared_ptr<GpuMem> mGpuMem;
    std::shared_ptr<GpuMem> mGpuMem;
    std::shared_ptr<gpuwork::GpuWork> mGpuWork;
    std::unique_ptr<GpuStats> mGpuStats;
    std::unique_ptr<GpuStats> mGpuStats;
    std::unique_ptr<GpuMemTracer> mGpuMemTracer;
    std::unique_ptr<GpuMemTracer> mGpuMemTracer;
    std::mutex mLock;
    std::mutex mLock;
+57 −0
Original line number Original line Diff line number Diff line
// Copyright 2022 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package {
    default_applicable_licenses: ["frameworks_native_license"],
}

cc_library_shared {
    name: "libgpuwork",
    srcs: [
        "GpuWork.cpp",
    ],
    header_libs: [
        "gpu_work_structs",
    ],
    shared_libs: [
        "libbase",
        "libbpf_bcc",
        "libbpf_android",
        "libcutils",
        "liblog",
        "libutils",
    ],
    export_include_dirs: [
        "include",
    ],
    export_header_lib_headers: [
        "gpu_work_structs",
    ],
    export_shared_lib_headers: [
        "libbase",
        "libbpf_android",
    ],
    cppflags: [
        "-Wall",
        "-Werror",
        "-Wformat",
        "-Wthread-safety",
        "-Wunused",
        "-Wunreachable-code",
    ],
    required: [
        "bpfloader",
        "gpu_work.o",
    ],
}
+296 −0
Original line number Original line Diff line number Diff line
/*
 * Copyright 2022 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#undef LOG_TAG
#define LOG_TAG "GpuWork"
#define ATRACE_TAG ATRACE_TAG_GRAPHICS

#include "gpuwork/GpuWork.h"

#include <android-base/stringprintf.h>
#include <bpf/WaitForProgsLoaded.h>
#include <libbpf.h>
#include <libbpf_android.h>
#include <log/log.h>
#include <unistd.h>
#include <utils/Timers.h>
#include <utils/Trace.h>

#include <chrono>
#include <cstdint>
#include <map>
#include <mutex>
#include <unordered_map>
#include <vector>

#include "gpuwork/gpu_work.h"

#define MS_IN_NS (1000000)

namespace android {
namespace gpuwork {

namespace {

// Gets a BPF map from |mapPath|.
template <class Key, class Value>
bool getBpfMap(const char* mapPath, bpf::BpfMap<Key, Value>* out) {
    errno = 0;
    auto map = bpf::BpfMap<Key, Value>(mapPath);
    if (!map.isValid()) {
        ALOGW("Failed to create bpf map from %s [%d(%s)]", mapPath, errno, strerror(errno));
        return false;
    }
    *out = std::move(map);
    return true;
}

} // namespace

using base::StringAppendF;

GpuWork::~GpuWork() {
    // If we created our clearer thread, then we must stop it and join it.
    if (mMapClearerThread.joinable()) {
        // Tell the thread to terminate.
        {
            std::scoped_lock<std::mutex> lock(mMutex);
            mIsTerminating = true;
            mIsTerminatingConditionVariable.notify_all();
        }

        // Now, we can join it.
        mMapClearerThread.join();
    }

    bpf_detach_tracepoint("power", "gpu_work_period");
}

void GpuWork::initialize() {
    // Make sure BPF programs are loaded.
    bpf::waitForProgsLoaded();

    // Get the BPF maps before trying to attach the BPF program; if we can't get
    // the maps then there is no point in attaching the BPF program.
    {
        std::lock_guard<std::mutex> lock(mMutex);

        if (!getBpfMap("/sys/fs/bpf/map_gpu_work_gpu_work_map", &mGpuWorkMap)) {
            return;
        }

        if (!getBpfMap("/sys/fs/bpf/map_gpu_work_gpu_work_global_data", &mGpuWorkGlobalDataMap)) {
            return;
        }
    }

    // Attach the tracepoint ONLY if we got the map above.
    if (!attachTracepoint("/sys/fs/bpf/prog_gpu_work_tracepoint_power_gpu_work_period", "power",
                          "gpu_work_period")) {
        return;
    }

    // Create the map clearer thread, and store it to |mMapClearerThread|.
    std::thread thread([this]() { periodicallyClearMap(); });

    mMapClearerThread.swap(thread);

    ALOGI("Initialized!");

    mInitialized.store(true);
}

void GpuWork::dump(const Vector<String16>& /* args */, std::string* result) {
    if (!mInitialized.load()) {
        result->append("GPU time in state information is not available.\n");
        return;
    }

    // Ordered map ensures output data is sorted by UID.
    std::map<Uid, UidTrackingInfo> dumpMap;

    {
        std::lock_guard<std::mutex> lock(mMutex);

        if (!mGpuWorkMap.isValid()) {
            result->append("GPU time in state map is not available.\n");
            return;
        }

        // Iteration of BPF hash maps can be unreliable (no data races, but elements
        // may be repeated), as the map is typically being modified by other
        // threads. The buckets are all preallocated. Our eBPF program only updates
        // entries (in-place) or adds entries. |GpuWork| only iterates or clears the
        // map while holding |mMutex|. Given this, we should be able to iterate over
        // all elements reliably. In the worst case, we might see elements more than
        // once.

        // Note that userspace reads of BPF maps make a copy of the value, and
        // thus the returned value is not being concurrently accessed by the BPF
        // program (no atomic reads needed below).

        mGpuWorkMap.iterateWithValue([&dumpMap](const Uid& key, const UidTrackingInfo& value,
                                                const android::bpf::BpfMap<Uid, UidTrackingInfo>&)
                                             -> base::Result<void> {
            dumpMap[key] = value;
            return {};
        });
    }

    // Find the largest frequency where some UID has spent time in that frequency.
    size_t largestFrequencyWithTime = 0;
    for (const auto& uidToUidInfo : dumpMap) {
        for (size_t i = largestFrequencyWithTime + 1; i < kNumTrackedFrequencies; ++i) {
            if (uidToUidInfo.second.frequency_times_ns[i] > 0) {
                largestFrequencyWithTime = i;
            }
        }
    }

    // Dump time in state information.
    // E.g.
    // uid/freq: 0MHz 50MHz 100MHz ...
    // 1000: 0 0 0 0 ...
    // 1003: 0 0 3456 0 ...
    // [errors:3]1006: 0 0 3456 0 ...

    // Header.
    result->append("GPU time in frequency state in ms.\n");
    result->append("uid/freq: 0MHz");
    for (size_t i = 1; i <= largestFrequencyWithTime; ++i) {
        StringAppendF(result, " %zuMHz", i * 50);
    }
    result->append("\n");

    for (const auto& uidToUidInfo : dumpMap) {
        if (uidToUidInfo.second.error_count) {
            StringAppendF(result, "[errors:%" PRIu32 "]", uidToUidInfo.second.error_count);
        }
        StringAppendF(result, "%" PRIu32 ":", uidToUidInfo.first);
        for (size_t i = 0; i <= largestFrequencyWithTime; ++i) {
            StringAppendF(result, " %" PRIu64,
                          uidToUidInfo.second.frequency_times_ns[i] / MS_IN_NS);
        }
        result->append("\n");
    }
}

bool GpuWork::attachTracepoint(const char* programPath, const char* tracepointGroup,
                               const char* tracepointName) {
    errno = 0;
    base::unique_fd fd(bpf::retrieveProgram(programPath));
    if (fd < 0) {
        ALOGW("Failed to retrieve pinned program from %s [%d(%s)]", programPath, errno,
              strerror(errno));
        return false;
    }

    // Attach the program to the tracepoint. The tracepoint is automatically enabled.
    errno = 0;
    int count = 0;
    while (bpf_attach_tracepoint(fd.get(), tracepointGroup, tracepointName) < 0) {
        if (++count > kGpuWaitTimeoutSeconds) {
            ALOGW("Failed to attach bpf program to %s/%s tracepoint [%d(%s)]", tracepointGroup,
                  tracepointName, errno, strerror(errno));
            return false;
        }
        // Retry until GPU driver loaded or timeout.
        sleep(1);
        errno = 0;
    }

    return true;
}

void GpuWork::periodicallyClearMap() {
    std::unique_lock<std::mutex> lock(mMutex);

    auto previousTime = std::chrono::steady_clock::now();

    while (true) {
        if (mIsTerminating) {
            break;
        }
        auto nextTime = std::chrono::steady_clock::now();
        auto differenceSeconds =
                std::chrono::duration_cast<std::chrono::seconds>(nextTime - previousTime);
        if (differenceSeconds.count() > kMapClearerWaitDurationSeconds) {
            // It has been >1 hour, so clear the map, if needed.
            clearMapIfNeeded();
            // We only update |previousTime| if we actually checked the map.
            previousTime = nextTime;
        }
        // Sleep for ~1 hour. It does not matter if we don't check the map for 2
        // hours.
        mIsTerminatingConditionVariable.wait_for(lock,
                                                 std::chrono::seconds{
                                                         kMapClearerWaitDurationSeconds});
    }
}

void GpuWork::clearMapIfNeeded() {
    if (!mInitialized.load() || !mGpuWorkMap.isValid() || !mGpuWorkGlobalDataMap.isValid()) {
        ALOGW("Map clearing could not occur because we are not initialized properly");
        return;
    }

    base::Result<GlobalData> globalData = mGpuWorkGlobalDataMap.readValue(0);
    if (!globalData.ok()) {
        ALOGW("Could not read BPF global data map entry");
        return;
    }

    // Note that userspace reads of BPF maps make a copy of the value, and thus
    // the return value is not being concurrently accessed by the BPF program
    // (no atomic reads needed below).

    uint64_t numEntries = globalData.value().num_map_entries;

    // If the map is <=75% full, we do nothing.
    if (numEntries <= (kMaxTrackedUids / 4) * 3) {
        return;
    }

    // Iterating BPF maps to delete keys is tricky. If we just repeatedly call
    // |getFirstKey()| and delete that, we may loop forever (or for a long time)
    // because our BPF program might be repeatedly re-adding UID keys. Also,
    // even if we limit the number of elements we try to delete, we might only
    // delete new entries, leaving old entries in the map. If we delete a key A
    // and then call |getNextKey(A)|, the first key in the map is returned, so
    // we have the same issue.
    //
    // Thus, we instead get the next key and then delete the previous key. We
    // also limit the number of deletions we try, just in case.

    base::Result<Uid> key = mGpuWorkMap.getFirstKey();

    for (size_t i = 0; i < kMaxTrackedUids; ++i) {
        if (!key.ok()) {
            break;
        }
        base::Result<Uid> previousKey = key;
        key = mGpuWorkMap.getNextKey(previousKey.value());
        mGpuWorkMap.deleteValue(previousKey.value());
    }

    // Reset our counter; |globalData| is a copy of the data, so we have to use
    // |writeValue|.
    globalData.value().num_map_entries = 0;
    mGpuWorkGlobalDataMap.writeValue(0, globalData.value(), BPF_ANY);
}

} // namespace gpuwork
} // namespace android
Loading