Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a9886c58 authored by Kenny Root's avatar Kenny Root
Browse files

Totally remove Unicode.cpp and rely on ICU

Unicode.cpp used a packed data table for character data that essentially
duplicated ICU's functionality.

Change-Id: Ia68fe4ac94e89dc68d9a3f45f33f6e648a5500b7
parent 7f66d6e6
Loading
Loading
Loading
Loading
+37 −7
Original line number Diff line number Diff line
@@ -20,8 +20,32 @@
#include <jni.h>
#include <android_runtime/AndroidRuntime.h>
#include "utils/misc.h"
#include "utils/AndroidUnicode.h"
#include "utils/Log.h"
#include "unicode/uchar.h"

#define DIRECTIONALITY_UNDEFINED (-1)
// ICU => JDK mapping
static int directionality_map[U_CHAR_DIRECTION_COUNT] = {
    0, // U_LEFT_TO_RIGHT (0) => DIRECTIONALITY_LEFT_TO_RIGHT (0)
    1, // U_RIGHT_TO_LEFT (1) => DIRECTIONALITY_RIGHT_TO_LEFT (1)
    3, // U_EUROPEAN_NUMBER (2) => DIRECTIONALITY_EUROPEAN_NUMBER (3)
    4, // U_EUROPEAN_NUMBER_SEPARATOR (3) => DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR (4)
    5, // U_EUROPEAN_NUMBER_TERMINATOR (4) => DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR (5)
    6, // U_ARABIC_NUMBER (5) => DIRECTIONALITY_ARABIC_NUMBER (6)
    7, // U_COMMON_NUMBER_SEPARATOR (6) => DIRECTIONALITY_COMMON_NUMBER_SEPARATOR (7)
    10, // U_BLOCK_SEPARATOR (7) => DIRECTIONALITY_PARAGRAPH_SEPARATOR (10)
    11, // U_SEGMENT_SEPARATOR (8) => DIRECTIONALITY_SEGMENT_SEPARATOR (11)
    12, // U_WHITE_SPACE_NEUTRAL (9) => DIRECTIONALITY_WHITESPACE (12)
    13, // U_OTHER_NEUTRAL (10) => DIRECTIONALITY_OTHER_NEUTRALS (13)
    14, // U_LEFT_TO_RIGHT_EMBEDDING (11) => DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING (14)
    15, // U_LEFT_TO_RIGHT_OVERRIDE (12) => DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE (15)
    2, // U_RIGHT_TO_LEFT_ARABIC (13) => DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC (2)
    16, // U_RIGHT_TO_LEFT_EMBEDDING (14) => DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING (16)
    17, // U_RIGHT_TO_LEFT_OVERRIDE (15) => DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE (17)
    18, // U_POP_DIRECTIONAL_FORMAT (16) => DIRECTIONALITY_POP_DIRECTIONAL_FORMAT (18)
    8, // U_DIR_NON_SPACING_MARK (17) => DIRECTIONALITY_NONSPACING_MARK (8)
    9, // U_BOUNDARY_NEUTRAL (18) => DIRECTIONALITY_BOUNDARY_NEUTRAL (9)
};

namespace android {
    
@@ -53,15 +77,21 @@ static void getDirectionalities(JNIEnv* env, jobject obj, jcharArray srcArray, j
            src[i + 1] >= 0xDC00 && src[i + 1] <= 0xDFFF) {
            int c = 0x00010000 + ((src[i] - 0xD800) << 10) +
                                 (src[i + 1] & 0x3FF);
            int dir = android::Unicode::getDirectionality(c);
            int dir = u_charDirection(c);
            if (dir < 0 || dir >= U_CHAR_DIRECTION_COUNT)
                dir = DIRECTIONALITY_UNDEFINED;
            else
                dir = directionality_map[dir];

            dest[i++] = dir;
            dest[i] = dir;
        } else {
            int c = src[i];
            int dir = android::Unicode::getDirectionality(c);

            dest[i] = dir;
            int dir = u_charDirection(c);
            if (dir < 0 || dir >= U_CHAR_DIRECTION_COUNT)
                dest[i] = DIRECTIONALITY_UNDEFINED;
            else
                dest[i] = directionality_map[dir];
        }
    }
    
@@ -89,7 +119,7 @@ static jboolean mirror(JNIEnv* env, jobject obj, jcharArray charArray, int start
        // XXX this thinks it knows that surrogates are never mirrored

        int c1 = data[i];
        int c2 = android::Unicode::toMirror(c1);
        int c2 = u_charMirror(c1);

        if (c1 != c2) {
            data[i] = c2;
@@ -104,7 +134,7 @@ MIRROR_END:

static jchar getMirror(JNIEnv* env, jobject obj, jchar c)
{   
    return android::Unicode::toMirror(c);
    return u_charMirror(c);
}

static JNINativeMethod gMethods[] = {

include/utils/AndroidUnicode.h

deleted100644 → 0
+0 −128
Original line number Diff line number Diff line
/*
 * Copyright (C) 2006 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//

#ifndef ANDROID_UNICODE_H
#define ANDROID_UNICODE_H

#include <stdint.h>
#include <sys/types.h>

#define REPLACEMENT_CHAR (0xFFFD)

// this part of code is copied from umachine.h under ICU
/**
 * Define UChar32 as a type for single Unicode code points.
 * UChar32 is a signed 32-bit integer (same as int32_t).
 *
 * The Unicode code point range is 0..0x10ffff.
 * All other values (negative or >=0x110000) are illegal as Unicode code points.
 * They may be used as sentinel values to indicate "done", "error"
 * or similar non-code point conditions.
 *
 * @stable ICU 2.4
 */
typedef int32_t UChar32;

namespace android {

    class Encoding;
    /**
     * \class Unicode
     *
     * Helper class for getting properties of Unicode characters. Characters
     * can have one of the types listed in CharType and each character can have the
     * directionality of Direction.
     */
    class Unicode
    {
    public:
        /**
         * Directions specified in the Unicode standard. These directions map directly
         * to java.lang.Character.
         */
        enum Direction {
            DIRECTIONALITY_UNDEFINED = -1,
            DIRECTIONALITY_LEFT_TO_RIGHT,
            DIRECTIONALITY_RIGHT_TO_LEFT,
            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
            DIRECTIONALITY_EUROPEAN_NUMBER,
            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
            DIRECTIONALITY_ARABIC_NUMBER,
            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
            DIRECTIONALITY_NONSPACING_MARK,
            DIRECTIONALITY_BOUNDARY_NEUTRAL,
            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
            DIRECTIONALITY_SEGMENT_SEPARATOR,
            DIRECTIONALITY_WHITESPACE,
            DIRECTIONALITY_OTHER_NEUTRALS,
            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
        };

        /**
         * Returns the packed data for java calls
         * @param c The unicode character.
         * @return The packed data for the character.
         *
         * Copied from java.lang.Character implementation:
         * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
         * F E D C B A 9 8 7 6 5 4 3 2 1 0 F E D C B A 9 8 7 6 5 4 3 2 1 0
         * 
         *                              31 types                 ---------
         *                   18 directionalities       ---------
         *                   2 mirroreds             -
         *                               -----------      56  toupper diffs
         *                   -----------                  48  tolower diffs
         *               ---                              4 totitlecase diffs
         * -------------                                 84 numeric values
         *     ---------                                 24 mirror char diffs
         */
        static uint32_t getPackedData(UChar32 c);
        
        /**
         * Get the directionality of the character.
         * @param c The unicode character.
         * @return The direction of the character or DIRECTIONALITY_UNDEFINED.
         */
        static Direction getDirectionality(UChar32 c);
            
        /**
         * Check if the character is a mirrored character. This means that the character
         * has an equivalent character that is the mirror image of itself.
         * @param c The unicode character.
         * @return True iff c has a mirror equivalent.
         */
        static bool isMirrored(UChar32 c);
         
        /**
         * Return the mirror of the given character.
         * @param c The unicode character.
         * @return The mirror equivalent of c. If c does not have a mirror equivalent,
         *         the original character is returned.
         * @see isMirrored
         */
        static UChar32 toMirror(UChar32 c);
   };

}

#endif
+0 −1
Original line number Diff line number Diff line
@@ -75,7 +75,6 @@ include $(CLEAR_VARS)
# we have the common sources, plus some device-specific stuff
LOCAL_SRC_FILES:= \
	$(commonSources) \
	Unicode.cpp \
    BackupData.cpp \
	BackupHelpers.cpp

libs/utils/CharacterData.h

deleted100644 → 0
+0 −689

File deleted.

Preview size limit exceeded, changes collapsed.

libs/utils/Unicode.cpp

deleted100644 → 0
+0 −132
Original line number Diff line number Diff line
/*
 * Copyright (C) 2008 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <utils/AndroidUnicode.h>
#include "CharacterData.h"

#define LOG_TAG "Unicode"
#include <utils/Log.h>

// ICU headers for using macros
#include <unicode/utf16.h>

#define MIN_RADIX 2
#define MAX_RADIX 36

#define TYPE_SHIFT 0
#define TYPE_MASK ((1<<5)-1)

#define DIRECTION_SHIFT (TYPE_SHIFT+5)
#define DIRECTION_MASK ((1<<5)-1)

#define MIRRORED_SHIFT (DIRECTION_SHIFT+5)
#define MIRRORED_MASK ((1<<1)-1)

#define TOUPPER_SHIFT (MIRRORED_SHIFT+1)
#define TOUPPER_MASK ((1<<6)-1)

#define TOLOWER_SHIFT (TOUPPER_SHIFT+6)
#define TOLOWER_MASK ((1<<6)-1)

#define TOTITLE_SHIFT (TOLOWER_SHIFT+6)
#define TOTITLE_MASK ((1<<2)-1)

#define MIRROR_SHIFT (TOTITLE_SHIFT+2)
#define MIRROR_MASK ((1<<5)-1)

#define NUMERIC_SHIFT (TOTITLE_SHIFT+2)
#define NUMERIC_MASK ((1<<7)-1)

#define DECOMPOSITION_SHIFT (11)
#define DECOMPOSITION_MASK ((1<<5)-1)

/*
 * Returns the value stored in the CharacterData tables that contains
 * an index into the packed data table and the decomposition type.
 */
static uint16_t findCharacterValue(UChar32 c)
{
    LOG_ASSERT(c >= 0 && c <= 0x10FFFF, "findCharacterValue received an invalid codepoint");
    if (c < 256)
        return CharacterData::LATIN1_DATA[c];

    // Rotate the bits because the tables are separated into even and odd codepoints
    c = (c >> 1) | ((c & 1) << 20);

    CharacterData::Range search = CharacterData::FULL_DATA[c >> 16];
    const uint32_t* array = search.array;
 
    // This trick is so that that compare in the while loop does not
    // need to shift the array entry down by 16
    c <<= 16;
    c |= 0xFFFF;

    int high = (int)search.length - 1;
    int low = 0;

    if (high < 0)
        return 0;
    
    while (low < high - 1)
    {
        int probe = (high + low) >> 1;

        // The entries contain the codepoint in the high 16 bits and the index
        // into PACKED_DATA in the low 16.
        if (array[probe] > (unsigned)c)
            high = probe;
        else
            low = probe;
    }

    LOG_ASSERT((array[low] <= (unsigned)c), "A suitable range was not found");
    return array[low] & 0xFFFF;
}

uint32_t android::Unicode::getPackedData(UChar32 c)
{
    // findCharacterValue returns a 16-bit value with the top 5 bits containing a decomposition type
    // and the remaining bits containing an index.
    return CharacterData::PACKED_DATA[findCharacterValue(c) & 0x7FF];
}

android::Unicode::Direction android::Unicode::getDirectionality(UChar32 c)
{
    uint32_t data = getPackedData(c);

    if (0 == data)
        return DIRECTIONALITY_UNDEFINED;

    Direction d = (Direction) ((data >> DIRECTION_SHIFT) & DIRECTION_MASK);

    if (DIRECTION_MASK == d)
        return DIRECTIONALITY_UNDEFINED;
    
    return d;
}

bool android::Unicode::isMirrored(UChar32 c)
{
    return ((getPackedData(c) >> MIRRORED_SHIFT) & MIRRORED_MASK) != 0;
}

UChar32 android::Unicode::toMirror(UChar32 c)
{
    if (!isMirrored(c))
        return c;

    return c + CharacterData::MIRROR_DIFF[(getPackedData(c) >> MIRROR_SHIFT) & MIRROR_MASK];
}