Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2a390dbe authored by Andy Hung's avatar Andy Hung
Browse files

Spatial Audio: Roll pitch yaw logging.

Add roll, pitch, yaw angle logging.

Test: atest libheadtracking-test
Test: check dumpsys, use head tracker
Bug: 269620212
Bug: 269683154
Merged-In: Iaa0249b8498a0b5d4e72e777d62036096e643f37
Change-Id: Iaa0249b8498a0b5d4e72e777d62036096e643f37
parent 82a4eab1
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -88,6 +88,7 @@ cc_test_host {
    ],
    shared_libs: [
        "libaudioutils",
        "libbase", // StringAppendF
        "libheadtracking",
    ],
}
+86 −0
Original line number Diff line number Diff line
@@ -51,6 +51,92 @@ TEST(QuaternionUtil, RoundTripFromVector) {
    EXPECT_EQ(vec, quaternionToRotationVector(rotationVectorToQuaternion(vec)));
}

// Float precision necessitates this precision (1e-4f fails)
constexpr float NEAR = 1e-3f;

TEST(QuaternionUtil, quaternionToAngles_basic) {
    float pitch, roll, yaw;

   // angles as reported.
   // choose 11 angles between -M_PI / 2 to M_PI / 2
    for (int step = -5; step <= 5; ++step) {
        const float angle = M_PI * step * 0.1f;

        quaternionToAngles(rotationVectorToQuaternion({angle, 0.f, 0.f}), &pitch, &roll, &yaw);
        EXPECT_NEAR(angle, pitch, NEAR);
        EXPECT_NEAR(0.f, roll, NEAR);
        EXPECT_NEAR(0.f, yaw, NEAR);

        quaternionToAngles(rotationVectorToQuaternion({0.f, angle, 0.f}), &pitch, &roll, &yaw);
        EXPECT_NEAR(0.f, pitch, NEAR);
        EXPECT_NEAR(angle, roll, NEAR);
        EXPECT_NEAR(0.f, yaw, NEAR);

        quaternionToAngles(rotationVectorToQuaternion({0.f, 0.f, angle}), &pitch, &roll, &yaw);
        EXPECT_NEAR(0.f, pitch, NEAR);
        EXPECT_NEAR(0.f, roll, NEAR);
        EXPECT_NEAR(angle, yaw, NEAR);
    }

    // Generates a debug string
    const std::string s = quaternionToAngles<true /* DEBUG */>(
            rotationVectorToQuaternion({M_PI, 0.f, 0.f}), &pitch, &roll, &yaw);
    ASSERT_FALSE(s.empty());
}

TEST(QuaternionUtil, quaternionToAngles_zaxis) {
    float pitch, roll, yaw;

    for (int rot_step = -10; rot_step <= 10; ++rot_step) {
        const float rot_angle = M_PI * rot_step * 0.1f;
        // pitch independent of world Z rotation

        // We don't test the boundaries of pitch +-M_PI/2 as roll can become
        // degenerate and atan(0, 0) may report 0, PI, or -PI.
        for (int step = -4; step <= 4; ++step) {
            const float angle = M_PI * step * 0.1f;
            auto q = rotationVectorToQuaternion({angle, 0.f, 0.f});
            auto world_z = rotationVectorToQuaternion({0.f, 0.f, rot_angle});

            // Sequential active rotations (on world frame) compose as R_2 * R_1.
            quaternionToAngles(world_z * q, &pitch, &roll, &yaw);

            EXPECT_NEAR(angle, pitch, NEAR);
            EXPECT_NEAR(0.f, roll, NEAR);
       }

        // roll independent of world Z rotation
        for (int step = -5; step <= 5; ++step) {
            const float angle = M_PI * step * 0.1f;
            auto q = rotationVectorToQuaternion({0.f, angle, 0.f});
            auto world_z = rotationVectorToQuaternion({0.f, 0.f, rot_angle});

            // Sequential active rotations (on world frame) compose as R_2 * R_1.
            quaternionToAngles(world_z * q, &pitch, &roll, &yaw);

            EXPECT_NEAR(0.f, pitch, NEAR);
            EXPECT_NEAR(angle, roll, NEAR);

            // Convert extrinsic (world-based) active rotations to a sequence of
            // intrinsic rotations (each rotation based off of previous rotation
            // frame).
            //
            // R_1 * R_intrinsic = R_extrinsic * R_1
            //    implies
            // R_intrinsic = (R_1)^-1 R_extrinsic R_1
            //
            auto world_z_intrinsic = rotationVectorToQuaternion(
                    q.inverse() * Vector3f(0.f, 0.f, rot_angle));

            // Sequential intrinsic rotations compose as R_1 * R_2.
            quaternionToAngles(q * world_z_intrinsic, &pitch, &roll, &yaw);

            EXPECT_NEAR(0.f, pitch, NEAR);
            EXPECT_NEAR(angle, roll, NEAR);
        }
    }
}

}  // namespace
}  // namespace media
}  // namespace android
+241 −0
Original line number Diff line number Diff line
@@ -15,7 +15,9 @@
 */
#pragma once

#include <android-base/stringprintf.h>
#include <Eigen/Geometry>
#include <media/Pose.h>

namespace android {
namespace media {
@@ -52,5 +54,244 @@ Eigen::Quaternionf rotateY(float angle);
 */
Eigen::Quaternionf rotateZ(float angle);

/**
 * Compute separate roll, pitch, and yaw angles from a quaternion
 *
 * The roll, pitch, and yaw follow standard 3DOF virtual reality definitions
 * with angles increasing counter-clockwise by the right hand rule.
 *
 * https://en.wikipedia.org/wiki/Six_degrees_of_freedom
 *
 * The roll, pitch, and yaw angles are calculated separately from the device frame
 * rotation from the world frame.  This is not to be confused with the
 * intrinsic Euler xyz roll, pitch, yaw 'nautical' angles.
 *
 * The input quarternion is the active rotation that transforms the
 * World/Stage frame to the Head/Screen frame.
 *
 * The input quaternion may come from two principal sensors: DEVICE and HEADSET
 * and are interpreted as below.
 *
 * DEVICE SENSOR
 *
 * Android sensor stack assumes device coordinates along the x/y axis.
 *
 * https://developer.android.com/reference/android/hardware/SensorEvent#sensor.type_rotation_vector:
 *
 * Looking down from the clouds. Android Device coordinate system (not used)
 *        DEVICE --> X (Y goes through top speaker towards the observer)
 *           | Z
 *           V
 *         USER
 *
 * Internally within this library, we transform the device sensor coordinate
 * system by rotating the coordinate system around the X axis by -M_PI/2.
 * This aligns the device coordinate system to match that of the
 * Head Tracking sensor (see below), should the user be facing the device in
 * natural (phone == portrait, tablet == ?) orientation.
 *
 * Looking down from the clouds. Spatializer device frame.
 *           Y
 *           ^
 *           |
 *        DEVICE --> X (Z goes through top of the DEVICE towards the observer)
 *
 *         USER
 *
 * The reference world frame is the device in vertical
 * natural (phone == portrait) orientation with the top pointing straight
 * up from the ground and the front-to-back direction facing north.
 * The world frame is presumed locally fixed by magnetic and gravitational reference.
 *
 * HEADSET SENSOR
 * https://developer.android.com/reference/android/hardware/SensorEvent#sensor.type_head_tracker:
 *
 * Looking down from the clouds. Headset frame.
 *           Y
 *           ^
 *           |
 *         USER ---> X
 *         (Z goes through the top of the USER head towards the observer)
 *
 * The Z axis goes from the neck to the top of the head, the X axis goes
 * from the left ear to the right ear, the Y axis goes from the back of the
 * head through the nose.
 *
 * Typically for a headset sensor, the X and Y axes have some arbitrary fixed
 * reference.
 *
 * ROLL
 * Roll is the counter-clockwise L/R motion around the Y axis (hence ZX plane).
 * The right hand convention means the plane is ZX not XZ.
 * This can be considered the azimuth angle in spherical coordinates
 * with Pitch being the elevation angle.
 *
 * Roll has a range of -M_PI to M_PI radians.
 *
 * Rolling a device changes between portrait and landscape
 * modes, and for L/R speakers will limit the amount of crosstalk cancellation.
 * Roll increases as the device (if vertical like a coin) rolls from left to right.
 *
 * By this definition, Roll is less accurate when the device is flat
 * on a table rather than standing on edge.
 * When perfectly flat on the table, roll may report as 0, M_PI, or -M_PI
 * due ambiguity / degeneracy of atan(0, 0) in this case (the device Y axis aligns with
 * the world Z axis), but exactly flat rarely occurs.
 *
 * Roll for a headset is the angle the head is inclined to the right side
 * (like sleeping).
 *
 * PITCH
 * Pitch is the Surface normal Y deviation (along the Z axis away from the earth).
 * This can be considered the elevation angle in spherical coordinates using
 * Roll as the azimuth angle.
 *
 * Pitch for a device determines whether the device is "upright" or lying
 * flat on the table (i.e. surface normal).  Pitch is 0 when upright, decreases
 * as the device top moves away from the user to -M_PI/2 when lying down face up.
 * Pitch increases from 0 to M_PI/2 when the device tilts towards the user, and is
 * M_PI/2 degrees when face down.
 *
 * Pitch for a headset is the user tilting the head/chin up or down,
 * like nodding.
 *
 * Pitch has a range of -M_PI/2, M_PI/2 radians.
 *
 * YAW
 * Yaw is the rotational component along the earth's XY tangential plane,
 * where the Z axis points radially away from the earth.
 *
 * Yaw has a range of -M_PI to M_PI radians.  If used for azimuth angle in
 * spherical coordinates, the elevation angle may be derived from the Z axis.
 *
 * A positive increase means the phone is rotating from right to left
 * when considered flat on the table.
 * (headset: the user is rotating their head to look left).
 * If left speaker or right earbud is pointing straight up or down,
 * this value is imprecise and Pitch or Roll is a more useful measure.
 *
 * Yaw for a device is like spinning a vertical device along the axis of
 * gravity, like spinning a coin.  Yaw increases as the coin / device
 * spins from right to left, rotating around the Z axis.
 *
 * Yaw for a headset is the user turning the head to look left or right
 * like shaking the head for no. Yaw is the primary angle for a binaural
 * head tracking device.
 *
 * @param q input active rotation Eigen quaternion.
 * @param pitch output set to pitch if not nullptr
 * @param roll output set to roll if not nullptr
 * @param yaw output set to yaw if not nullptr
 * @return (DEBUG==true) a debug string with intermediate transformation matrix
 *                       interpreted as the unit basis vectors.
 */

// DEBUG returns a debug string for analysis.
// We save unneeded rotation matrix computation by keeping the DEBUG option constexpr.
template <bool DEBUG = false>
auto quaternionToAngles(const Eigen::Quaternionf& q, float *pitch, float *roll, float *yaw) {
    /*
     * The quaternion here is the active rotation that transforms from the world frame
     * to the device frame: the observer remains in the world frame,
     * and the device (frame) moves.
     *
     * We use this to map device coordinates to world coordinates.
     *
     * Device:  We transform the device right speaker (X == 1), top speaker (Z == 1),
     * and surface inwards normal (Y == 1) positions to the world frame.
     *
     * Headset: We transform the headset right bud (X == 1), top (Z == 1) and
     * nose normal (Y == 1) positions to the world frame.
     *
     * This is the same as the world frame coordinates of the
     *  unit device vector in the X dimension (ux),
     *  unit device vector in the Y dimension (uy),
     *  unit device vector in the Z dimension (uz).
     *
     * Rather than doing the rotation on unit vectors individually,
     * one can simply use the columns of the rotation matrix of
     * the world-to-body quaternion, so the computation is exceptionally fast.
     *
     * Furthermore, Eigen inlines the "toRotationMatrix" method
     * and we rely on unused expression removal for efficiency
     * and any elements not used should not be computed.
     *
     * Side note: For applying a rotation to several points,
     * it is more computationally efficient to extract and
     * use the rotation matrix form than the quaternion.
     * So use of the rotation matrix is good for many reasons.
     */
    const auto rotation = q.toRotationMatrix();

    /*
     * World location of unit vector right speaker assuming the phone is situated
     * natural (phone == portrait) mode.
     * (headset: right bud).
     *
     * auto ux = q.rotation() * Eigen::Vector3f{1.f, 0.f, 0.f};
     *         = rotation.col(0);
     */
    [[maybe_unused]] const auto ux_0 = rotation.coeff(0, 0);
    [[maybe_unused]] const auto ux_1 = rotation.coeff(1, 0);
    [[maybe_unused]] const auto ux_2 = rotation.coeff(2, 0);

    [[maybe_unused]] std::string coordinates;
    if constexpr (DEBUG) {
        base::StringAppendF(&coordinates, "ux: %f %f %f", ux_0, ux_1, ux_2);
    }

    /*
     * World location of screen-inwards normal assuming the phone is situated
     * in natural (phone == portrait) mode.
     * (headset: user nose).
     *
     * auto uy = q.rotation() * Eigen::Vector3f{0.f, 1.f, 0.f};
     *         = rotation.col(1);
     */
    [[maybe_unused]] const auto uy_0 = rotation.coeff(0, 1);
    [[maybe_unused]] const auto uy_1 = rotation.coeff(1, 1);
    [[maybe_unused]] const auto uy_2 = rotation.coeff(2, 1);
    if constexpr (DEBUG) {
        base::StringAppendF(&coordinates, "uy: %f %f %f", uy_0, uy_1, uy_2);
    }

    /*
     * World location of unit vector top speaker.
     * (headset: top of head).
     * auto uz = q.rotation() * Eigen::Vector3f{0.f, 0.f, 1.f};
     *         = rotation.col(2);
     */
    [[maybe_unused]] const auto uz_0 = rotation.coeff(0, 2);
    [[maybe_unused]] const auto uz_1 = rotation.coeff(1, 2);
    [[maybe_unused]] const auto uz_2 = rotation.coeff(2, 2);
    if constexpr (DEBUG) {
        base::StringAppendF(&coordinates, "uz: %f %f %f", uz_0, uz_1, uz_2);
    }

    // pitch computed from nose world Z coordinate;
    // hence independent of rotation around world Z.
    if (pitch != nullptr) {
        *pitch = asin(std::clamp(uy_2, -1.f, 1.f));
    }

    // roll computed from head/right world Z coordinate;
    // hence independent of rotation around world Z.
    if (roll != nullptr) {
        // atan2 takes care of implicit scale normalization of Z, X.
        *roll = -atan2(ux_2, uz_2);
    }

    // yaw computed from right ear angle projected onto world XY plane
    // where world Z == 0.  This is the rotation around world Z.
    if (yaw != nullptr) {
        // atan2 takes care of implicit scale normalization of X, Y.
        *yaw =  atan2(ux_1, ux_0);
    }

    if constexpr (DEBUG) {
        return coordinates;
    }
}

}  // namespace media
}  // namespace android
+27 −17
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
#include <media/stagefright/foundation/AHandler.h>
#include <media/stagefright/foundation/AMessage.h>
#include <media/MediaMetricsItem.h>
#include <media/QuaternionUtil.h>
#include <media/ShmemCompat.h>
#include <mediautils/SchedulingPolicyService.h>
#include <mediautils/ServiceUtilities.h>
@@ -75,13 +76,21 @@ static audio_channel_mask_t getMaxChannelMask(
    return maxMask;
}

std::vector<float> recordFromRotationVector(const std::vector<float>& rotationVector) {
static std::vector<float> recordFromTranslationRotationVector(
        const std::vector<float>& trVector) {
    auto headToStageOpt = Pose3f::fromVector(trVector);
    if (!headToStageOpt) return {};

    const auto stageToHead = headToStageOpt.value().inverse();
    const auto stageToHeadTranslation = stageToHead.translation();
    constexpr float RAD_TO_DEGREE = 180.f / M_PI;
    std::vector<float> record{
        rotationVector[0], rotationVector[1], rotationVector[2],
        rotationVector[3] * RAD_TO_DEGREE,
        rotationVector[4] * RAD_TO_DEGREE,
        rotationVector[5] * RAD_TO_DEGREE};
        stageToHeadTranslation[0], stageToHeadTranslation[1], stageToHeadTranslation[2],
        0.f, 0.f, 0.f};
    media::quaternionToAngles(stageToHead.rotation(), &record[3], &record[4], &record[5]);
    record[3] *= RAD_TO_DEGREE;
    record[4] *= RAD_TO_DEGREE;
    record[5] *= RAD_TO_DEGREE;
    return record;
}

@@ -747,8 +756,9 @@ void Spatializer::onHeadToStagePoseMsg(const std::vector<float>& headToStage) {
        callback = mHeadTrackingCallback;
        if (mEngine != nullptr) {
            setEffectParameter_l(SPATIALIZER_PARAM_HEAD_TO_STAGE, headToStage);
            mPoseRecorder.record(headToStage);
            mPoseDurableRecorder.record(headToStage);
            const auto record = recordFromTranslationRotationVector(headToStage);
            mPoseRecorder.record(record);
            mPoseDurableRecorder.record(record);
        }
    }

@@ -1024,8 +1034,7 @@ void Spatializer::postFramesProcessedMsg(int frames) {
}

std::string Spatializer::toString(unsigned level) const {
    std::string prefixSpace;
    prefixSpace.append(level, ' ');
    std::string prefixSpace(level, ' ');
    std::string ss = prefixSpace + "Spatializer:\n";
    bool needUnlock = false;

@@ -1081,14 +1090,15 @@ std::string Spatializer::toString(unsigned level) const {

    // PostController dump.
    if (mPoseController != nullptr) {
        ss += mPoseController->toString(level + 1);
        ss.append(prefixSpace +
                  "Sensor data format - [rx, ry, rz, vx, vy, vz] (units-degree, "
                  "r-transform, v-angular velocity, x-pitch, y-roll, z-yaw):\n");
        ss.append(prefixSpace + " PerMinuteHistory:\n");
        ss += mPoseDurableRecorder.toString(level + 1);
        ss.append(prefixSpace + " PerSecondHistory:\n");
        ss += mPoseRecorder.toString(level + 1);
        ss.append(mPoseController->toString(level + 1))
            .append(prefixSpace)
            .append("Pose (active stage-to-head) [tx, ty, tz, pitch, roll, yaw]:\n")
            .append(prefixSpace)
            .append(" PerMinuteHistory:\n")
            .append(mPoseDurableRecorder.toString(level + 2))
            .append(prefixSpace)
            .append(" PerSecondHistory:\n")
            .append(mPoseRecorder.toString(level + 2));
    } else {
        ss.append(prefixSpace).append("SpatializerPoseController not exist\n");
    }