Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bb996561 authored by Roozbeh Pournader's avatar Roozbeh Pournader Committed by Android (Google) Code Review
Browse files

Merge "Parse BCP 47 locale names with Unicode extensions"

parents ad5379bd e7bc60a9
Loading
Loading
Loading
Loading
+115 −13
Original line number Diff line number Diff line
@@ -2847,14 +2847,111 @@ void ResTable_config::getBcp47Locale(char str[RESTABLE_MAX_LOCALE_LEN], bool can
        }
        memcpy(str + charsWritten, localeVariant, sizeof(localeVariant));
    }
}

/* static */ inline bool assignLocaleComponent(ResTable_config* config,
        const char* start, size_t size) {
    /* TODO: Add BCP47 extension. It requires RESTABLE_MAX_LOCALE_LEN
     * increase from 28 to 42 bytes (-u-nu-xxxxxxxx) */
}

struct LocaleParserState {
    enum State : uint8_t {
        BASE, UNICODE_EXTENSION, IGNORE_THE_REST
    } parserState;
    enum UnicodeState : uint8_t {
        /* Initial state after the Unicode singleton is detected. Either a keyword
         * or an attribute is expected. */
        NO_KEY,
        /* Unicode extension key (but not attribute) is expected. Next states:
         * NO_KEY, IGNORE_KEY or NUMBERING_SYSTEM. */
        EXPECT_KEY,
        /* A key is detected, however it is not supported for now. Ignore its
         * value. Next states: IGNORE_KEY or NUMBERING_SYSTEM. */
        IGNORE_KEY,
        /* Numbering system key was detected. Store its value in the configuration
         * localeNumberingSystem field. Next state: EXPECT_KEY */
        NUMBERING_SYSTEM
    } unicodeState;

    LocaleParserState(): parserState(BASE), unicodeState(NO_KEY) {}
};

/* static */ inline LocaleParserState assignLocaleComponent(ResTable_config* config,
        const char* start, size_t size, LocaleParserState state) {

    /* It is assumed that this function is not invoked with state.parserState
     * set to IGNORE_THE_REST. The condition is checked by setBcp47Locale
     * function. */

    if (state.parserState == LocaleParserState::UNICODE_EXTENSION) {
        switch (size) {
            case 1:
                /* Other BCP 47 extensions are not supported at the moment */
                state.parserState = LocaleParserState::IGNORE_THE_REST;
                break;
            case 2:
                if (state.unicodeState == LocaleParserState::NO_KEY ||
                    state.unicodeState == LocaleParserState::EXPECT_KEY) {
                    /* Analyze Unicode extension key. Currently only 'nu'
                     * (numbering system) is supported.*/
                    if ((start[0] == 'n' || start[0] == 'N') &&
                        (start[1] == 'u' || start[1] == 'U')) {
                        state.unicodeState = LocaleParserState::NUMBERING_SYSTEM;
                    } else {
                        state.unicodeState = LocaleParserState::IGNORE_KEY;
                    }
                } else {
                    /* Keys are not allowed in other state allowed, ignore the rest. */
                    state.parserState = LocaleParserState::IGNORE_THE_REST;
                }
                break;
            case 3:
            case 4:
            case 5:
            case 6:
            case 7:
            case 8:
                switch (state.unicodeState) {
                    case LocaleParserState::NUMBERING_SYSTEM:
                        /* Accept only the first occurrence of the numbering system. */
                        if (config->localeNumberingSystem[0] == '\0') {
                            for (size_t i = 0; i < size; ++i) {
                               config->localeNumberingSystem[i] = tolower(start[i]);
                            }
                            state.unicodeState = LocaleParserState::EXPECT_KEY;
                        } else {
                            state.parserState = LocaleParserState::IGNORE_THE_REST;
                        }
                        break;
                    case LocaleParserState::IGNORE_KEY:
                        /* Unsupported Unicode keyword. Ignore. */
                        state.unicodeState = LocaleParserState::EXPECT_KEY;
                        break;
                    case LocaleParserState::EXPECT_KEY:
                        /* A keyword followed by an attribute is not allowed. */
                        state.parserState = LocaleParserState::IGNORE_THE_REST;
                        break;
                    case LocaleParserState::NO_KEY:
                        /* Extension attribute. Do nothing. */
                        break;
                    default:
                        break;
                }
                break;
            default:
                /* Unexpected field length - ignore the rest and treat as an error */
                state.parserState = LocaleParserState::IGNORE_THE_REST;
        }
        return state;
    }

  switch (size) {
       case 0:
           return false;
           state.parserState = LocaleParserState::IGNORE_THE_REST;
           break;
       case 1:
           state.parserState = (start[0] == 'u' || start[0] == 'U')
                   ? LocaleParserState::UNICODE_EXTENSION
                   : LocaleParserState::IGNORE_THE_REST;
           break;
       case 2:
       case 3:
           config->language[0] ? config->packRegion(start) : config->packLanguage(start);
@@ -2878,30 +2975,35 @@ void ResTable_config::getBcp47Locale(char str[RESTABLE_MAX_LOCALE_LEN], bool can
           }
           break;
       default:
           return false;
           state.parserState = LocaleParserState::IGNORE_THE_REST;
  }

  return true;
  return state;
}

void ResTable_config::setBcp47Locale(const char* in) {
    locale = 0;
    memset(localeScript, 0, sizeof(localeScript));
    memset(localeVariant, 0, sizeof(localeVariant));
    memset(localeNumberingSystem, 0, sizeof(localeNumberingSystem));

    const char* separator = in;
    const char* start = in;
    while ((separator = strchr(start, '-')) != NULL) {
    LocaleParserState state;
    while (const char* separator = strchr(start, '-')) {
        const size_t size = separator - start;
        if (!assignLocaleComponent(this, start, size)) {
            fprintf(stderr, "Invalid BCP-47 locale string: %s", in);
        state = assignLocaleComponent(this, start, size, state);
        if (state.parserState == LocaleParserState::IGNORE_THE_REST) {
            fprintf(stderr, "Invalid BCP-47 locale string: %s\n", in);
            break;
        }

        start = (separator + 1);
    }

    const size_t size = in + strlen(in) - start;
    assignLocaleComponent(this, start, size);
    if (state.parserState != LocaleParserState::IGNORE_THE_REST) {
        const size_t size = strlen(start);
        assignLocaleComponent(this, start, size, state);
    }

    localeScriptWasComputed = (localeScript[0] == '\0');
    if (localeScriptWasComputed) {
        computeScript();
+8 −3
Original line number Diff line number Diff line
@@ -1182,6 +1182,10 @@ struct ResTable_config
    // tried but could not compute a script.
    bool localeScriptWasComputed;

    // The value of BCP 47 Unicode extension for key 'nu' (numbering system).
    // Varies in length from 3 to 8 chars. Zero-filled value.
    char localeNumberingSystem[8];

    void copyFromDeviceNoSwap(const ResTable_config& o);
    
    void copyFromDtoH(const ResTable_config& o);
@@ -1259,9 +1263,9 @@ struct ResTable_config
    // variants, it will be a modified bcp47 tag: b+en+Latn+US.
    void appendDirLocale(String8& str) const;

    // Sets the values of language, region, script and variant to the
    // well formed BCP-47 locale contained in |in|. The input locale is
    // assumed to be valid and no validation is performed.
    // Sets the values of language, region, script, variant and numbering
    // system to the well formed BCP 47 locale contained in |in|.
    // The input locale is assumed to be valid and no validation is performed.
    void setBcp47Locale(const char* in);

    inline void clearLocale() {
@@ -1269,6 +1273,7 @@ struct ResTable_config
        localeScriptWasComputed = false;
        memset(localeScript, 0, sizeof(localeScript));
        memset(localeVariant, 0, sizeof(localeVariant));
        memset(localeNumberingSystem, 0, sizeof(localeNumberingSystem));
    }

    inline void computeScript() {
+42 −0
Original line number Diff line number Diff line
@@ -185,6 +185,7 @@ TEST(ConfigLocaleTest, setLocale) {
    EXPECT_TRUE(test.localeScriptWasComputed);
    EXPECT_EQ(0, memcmp("Latn", test.localeScript, 4));
    EXPECT_EQ(0, test.localeVariant[0]);
    EXPECT_EQ(0, test.localeNumberingSystem[0]);

    test.setBcp47Locale("eng-419");
    char out[4] = {1, 1, 1, 1};
@@ -198,6 +199,7 @@ TEST(ConfigLocaleTest, setLocale) {
    EXPECT_EQ('4', out[0]);
    EXPECT_EQ('1', out[1]);
    EXPECT_EQ('9', out[2]);
    EXPECT_EQ(0, test.localeNumberingSystem[0]);

    test.setBcp47Locale("en-Latn-419");
    EXPECT_EQ('e', test.language[0]);
@@ -209,6 +211,7 @@ TEST(ConfigLocaleTest, setLocale) {
    EXPECT_EQ('4', out[0]);
    EXPECT_EQ('1', out[1]);
    EXPECT_EQ('9', out[2]);
    EXPECT_EQ(0, test.localeNumberingSystem[0]);

    test.setBcp47Locale("de-1901");
    memset(out, 1, 4);
@@ -222,6 +225,7 @@ TEST(ConfigLocaleTest, setLocale) {
    test.unpackRegion(out);
    EXPECT_EQ('\0', out[0]);
    EXPECT_EQ(0, strcmp("1901", test.localeVariant));
    EXPECT_EQ(0, test.localeNumberingSystem[0]);

    test.setBcp47Locale("de-Latn-1901");
    memset(out, 1, 4);
@@ -235,6 +239,44 @@ TEST(ConfigLocaleTest, setLocale) {
    test.unpackRegion(out);
    EXPECT_EQ('\0', out[0]);
    EXPECT_EQ(0, strcmp("1901", test.localeVariant));
    EXPECT_EQ(0, test.localeNumberingSystem[0]);

    test.setBcp47Locale("ar-EG-u-nu-latn");
    EXPECT_EQ('a', test.language[0]);
    EXPECT_EQ('r', test.language[1]);
    EXPECT_EQ('E', test.country[0]);
    EXPECT_EQ('G', test.country[1]);
    EXPECT_TRUE(test.localeScriptWasComputed);
    EXPECT_EQ(0, memcmp("Arab", test.localeScript, 4));
    EXPECT_EQ(0, test.localeVariant[0]);
    EXPECT_EQ(0, memcmp("latn", test.localeNumberingSystem, 4));

    test.setBcp47Locale("ar-EG-u");
    EXPECT_EQ(0, test.localeNumberingSystem[0]);

    test.setBcp47Locale("ar-EG-u-nu");
    EXPECT_EQ(0, test.localeNumberingSystem[0]);

    test.setBcp47Locale("ar-EG-u-attr-nu-latn");
    EXPECT_EQ(0, memcmp("latn", test.localeNumberingSystem, 4));

    test.setBcp47Locale("ar-EG-u-ca-gregory-nu-latn");
    EXPECT_EQ(0, memcmp("latn", test.localeNumberingSystem, 4));

    test.setBcp47Locale("ar-EG-u-nu-latn-ca-gregory");
    EXPECT_EQ(0, memcmp("latn", test.localeNumberingSystem, 4));

    test.setBcp47Locale("ar-EG-u-nu-toolongnumsys");
    EXPECT_EQ(0, test.localeNumberingSystem[0]);

    test.setBcp47Locale("ar-EG-u-nu-latn-nu-arab");
    EXPECT_EQ(0, memcmp("latn", test.localeNumberingSystem, 4));

    test.setBcp47Locale("ar-EG-u-co-nu-latn");
    EXPECT_EQ(0, test.localeNumberingSystem[0]);

    test.setBcp47Locale("ar-u-co-abcd-attr-nu-latn");
    EXPECT_EQ(0, test.localeNumberingSystem[0]);
}

TEST(ConfigLocaleTest, computeScript) {