Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4a2c17f7 authored by caozhiyuan's avatar caozhiyuan Committed by Alvin Cao
Browse files

libmedia: Improve charset detection.



Tags are first put together before being detected. When different tags
are encoded in different charset, the result could be very bad. So when
the best result is not good enough, we should not accept it. We give it
another chance by detecting tags separately.

For the test samples we collected, 6 songs out of 13 are now displayed
correctly.

Test: transfer mp3 to devices and see if the tags' charsets are detected
correctly.
Signed-off-by: default avatarcaozhiyuan <cao.zhiyuan@zte.com.cn>

Change-Id: I90d11a612b9f9e8896696df85635c4a46d067f09
parent 7b2b2a49
Loading
Loading
Loading
Loading
+20 −4
Original line number Diff line number Diff line
@@ -85,6 +85,8 @@ void CharacterEncodingDetector::detectAndConvert() {
        UErrorCode status = U_ZERO_ERROR;
        UCharsetDetector *csd = ucsdet_open(&status);
        const UCharsetMatch *ucm;
        bool goodmatch = true;
        int highest = 0;

        // try combined detection of artist/album/title etc.
        char buf[1024];
@@ -116,8 +118,6 @@ void CharacterEncodingDetector::detectAndConvert() {
            ucsdet_setText(csd, buf, strlen(buf), &status);
            int32_t matches;
            const UCharsetMatch** ucma = ucsdet_detectAll(csd, &matches, &status);
            bool goodmatch = true;
            int highest = 0;
            const UCharsetMatch* bestCombinedMatch = getPreferred(buf, strlen(buf),
                    ucma, matches, &goodmatch, &highest);

@@ -180,8 +180,24 @@ void CharacterEncodingDetector::detectAndConvert() {
                    !strcmp(name, "genre") ||
                    !strcmp(name, "album") ||
                    !strcmp(name, "title"))) {
                if (!goodmatch && highest < 0) {
                    // Give it one more chance if there is no good match.
                    ALOGV("Trying to detect %s separately", name);
                    int32_t matches;
                    bool goodmatchSingle = true;
                    int highestSingle = 0;
                    ucsdet_setText(csd, s, inputLength, &status);
                    const UCharsetMatch** ucma = ucsdet_detectAll(csd, &matches, &status);
                    const UCharsetMatch* bestSingleMatch = getPreferred(s, inputLength,
                            ucma, matches, &goodmatchSingle, &highestSingle);
                    if (goodmatchSingle || highestSingle > highest)
                        enc = ucsdet_getName(bestSingleMatch, &status);
                    else
                        enc = combinedenc;
                } else {
                    // use encoding determined from the combination of artist/album/title etc.
                    enc = combinedenc;
                }
            } else {
                if (isPrintableAscii(s, inputLength)) {
                    enc = "UTF-8";