Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7007a926 authored by Dianne Hackborn's avatar Dianne Hackborn Committed by Android (Google) Code Review
Browse files

Merge "Totally remove Unicode.cpp and rely on ICU"

parents c09a2ff5 0d88e7a8
Loading
Loading
Loading
Loading

include/utils/AndroidUnicode.h

deleted100644 → 0
+0 −128
Original line number Diff line number Diff line
/*
 * Copyright (C) 2006 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//

#ifndef ANDROID_UNICODE_H
#define ANDROID_UNICODE_H

#include <stdint.h>
#include <sys/types.h>

#define REPLACEMENT_CHAR (0xFFFD)

// this part of code is copied from umachine.h under ICU
/**
 * Define UChar32 as a type for single Unicode code points.
 * UChar32 is a signed 32-bit integer (same as int32_t).
 *
 * The Unicode code point range is 0..0x10ffff.
 * All other values (negative or >=0x110000) are illegal as Unicode code points.
 * They may be used as sentinel values to indicate "done", "error"
 * or similar non-code point conditions.
 *
 * @stable ICU 2.4
 */
typedef int32_t UChar32;

namespace android {

    class Encoding;
    /**
     * \class Unicode
     *
     * Helper class for getting properties of Unicode characters. Characters
     * can have one of the types listed in CharType and each character can have the
     * directionality of Direction.
     */
    class Unicode
    {
    public:
        /**
         * Directions specified in the Unicode standard. These directions map directly
         * to java.lang.Character.
         */
        enum Direction {
            DIRECTIONALITY_UNDEFINED = -1,
            DIRECTIONALITY_LEFT_TO_RIGHT,
            DIRECTIONALITY_RIGHT_TO_LEFT,
            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
            DIRECTIONALITY_EUROPEAN_NUMBER,
            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
            DIRECTIONALITY_ARABIC_NUMBER,
            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
            DIRECTIONALITY_NONSPACING_MARK,
            DIRECTIONALITY_BOUNDARY_NEUTRAL,
            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
            DIRECTIONALITY_SEGMENT_SEPARATOR,
            DIRECTIONALITY_WHITESPACE,
            DIRECTIONALITY_OTHER_NEUTRALS,
            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
        };

        /**
         * Returns the packed data for java calls
         * @param c The unicode character.
         * @return The packed data for the character.
         *
         * Copied from java.lang.Character implementation:
         * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
         * F E D C B A 9 8 7 6 5 4 3 2 1 0 F E D C B A 9 8 7 6 5 4 3 2 1 0
         * 
         *                              31 types                 ---------
         *                   18 directionalities       ---------
         *                   2 mirroreds             -
         *                               -----------      56  toupper diffs
         *                   -----------                  48  tolower diffs
         *               ---                              4 totitlecase diffs
         * -------------                                 84 numeric values
         *     ---------                                 24 mirror char diffs
         */
        static uint32_t getPackedData(UChar32 c);
        
        /**
         * Get the directionality of the character.
         * @param c The unicode character.
         * @return The direction of the character or DIRECTIONALITY_UNDEFINED.
         */
        static Direction getDirectionality(UChar32 c);
            
        /**
         * Check if the character is a mirrored character. This means that the character
         * has an equivalent character that is the mirror image of itself.
         * @param c The unicode character.
         * @return True iff c has a mirror equivalent.
         */
        static bool isMirrored(UChar32 c);
         
        /**
         * Return the mirror of the given character.
         * @param c The unicode character.
         * @return The mirror equivalent of c. If c does not have a mirror equivalent,
         *         the original character is returned.
         * @see isMirrored
         */
        static UChar32 toMirror(UChar32 c);
   };

}

#endif
+0 −1
Original line number Diff line number Diff line
@@ -75,7 +75,6 @@ include $(CLEAR_VARS)
# we have the common sources, plus some device-specific stuff
LOCAL_SRC_FILES:= \
	$(commonSources) \
	Unicode.cpp \
    BackupData.cpp \
	BackupHelpers.cpp

libs/utils/CharacterData.h

deleted100644 → 0
+0 −689

File deleted.

Preview size limit exceeded, changes collapsed.

libs/utils/Unicode.cpp

deleted100644 → 0
+0 −132
Original line number Diff line number Diff line
/*
 * Copyright (C) 2008 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <utils/AndroidUnicode.h>
#include "CharacterData.h"

#define LOG_TAG "Unicode"
#include <utils/Log.h>

// ICU headers for using macros
#include <unicode/utf16.h>

#define MIN_RADIX 2
#define MAX_RADIX 36

#define TYPE_SHIFT 0
#define TYPE_MASK ((1<<5)-1)

#define DIRECTION_SHIFT (TYPE_SHIFT+5)
#define DIRECTION_MASK ((1<<5)-1)

#define MIRRORED_SHIFT (DIRECTION_SHIFT+5)
#define MIRRORED_MASK ((1<<1)-1)

#define TOUPPER_SHIFT (MIRRORED_SHIFT+1)
#define TOUPPER_MASK ((1<<6)-1)

#define TOLOWER_SHIFT (TOUPPER_SHIFT+6)
#define TOLOWER_MASK ((1<<6)-1)

#define TOTITLE_SHIFT (TOLOWER_SHIFT+6)
#define TOTITLE_MASK ((1<<2)-1)

#define MIRROR_SHIFT (TOTITLE_SHIFT+2)
#define MIRROR_MASK ((1<<5)-1)

#define NUMERIC_SHIFT (TOTITLE_SHIFT+2)
#define NUMERIC_MASK ((1<<7)-1)

#define DECOMPOSITION_SHIFT (11)
#define DECOMPOSITION_MASK ((1<<5)-1)

/*
 * Returns the value stored in the CharacterData tables that contains
 * an index into the packed data table and the decomposition type.
 */
static uint16_t findCharacterValue(UChar32 c)
{
    LOG_ASSERT(c >= 0 && c <= 0x10FFFF, "findCharacterValue received an invalid codepoint");
    if (c < 256)
        return CharacterData::LATIN1_DATA[c];

    // Rotate the bits because the tables are separated into even and odd codepoints
    c = (c >> 1) | ((c & 1) << 20);

    CharacterData::Range search = CharacterData::FULL_DATA[c >> 16];
    const uint32_t* array = search.array;
 
    // This trick is so that that compare in the while loop does not
    // need to shift the array entry down by 16
    c <<= 16;
    c |= 0xFFFF;

    int high = (int)search.length - 1;
    int low = 0;

    if (high < 0)
        return 0;
    
    while (low < high - 1)
    {
        int probe = (high + low) >> 1;

        // The entries contain the codepoint in the high 16 bits and the index
        // into PACKED_DATA in the low 16.
        if (array[probe] > (unsigned)c)
            high = probe;
        else
            low = probe;
    }

    LOG_ASSERT((array[low] <= (unsigned)c), "A suitable range was not found");
    return array[low] & 0xFFFF;
}

uint32_t android::Unicode::getPackedData(UChar32 c)
{
    // findCharacterValue returns a 16-bit value with the top 5 bits containing a decomposition type
    // and the remaining bits containing an index.
    return CharacterData::PACKED_DATA[findCharacterValue(c) & 0x7FF];
}

android::Unicode::Direction android::Unicode::getDirectionality(UChar32 c)
{
    uint32_t data = getPackedData(c);

    if (0 == data)
        return DIRECTIONALITY_UNDEFINED;

    Direction d = (Direction) ((data >> DIRECTION_SHIFT) & DIRECTION_MASK);

    if (DIRECTION_MASK == d)
        return DIRECTIONALITY_UNDEFINED;
    
    return d;
}

bool android::Unicode::isMirrored(UChar32 c)
{
    return ((getPackedData(c) >> MIRRORED_SHIFT) & MIRRORED_MASK) != 0;
}

UChar32 android::Unicode::toMirror(UChar32 c)
{
    if (!isMirrored(c))
        return c;

    return c + CharacterData::MIRROR_DIFF[(getPackedData(c) >> MIRROR_SHIFT) & MIRROR_MASK];
}