Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 47360101 authored by Roozbeh Pournader's avatar Roozbeh Pournader
Browse files

Fix various conformance issues with TextDirectionHeuristics.

1. The algorithms no longer consider directional embeddings and
overrides as strong characters.

2. The algorithms are updated to Unicode 7.0, and consider the
directional isolates in determining the direction of input.

3. Codepoints are used instead of code units for looking up bidi
properties.

4. The bidi direction of unassigned characters is considered.

Bug: 20142480
Change-Id: I9be161c112b8f23565ed8961bb7d44ced234f67a
parent 11145938
Loading
Loading
Loading
Loading
+80 −40
Original line number Original line Diff line number Diff line
@@ -81,29 +81,47 @@ public class TextDirectionHeuristics {
    private static final int STATE_FALSE = 1;
    private static final int STATE_FALSE = 1;
    private static final int STATE_UNKNOWN = 2;
    private static final int STATE_UNKNOWN = 2;


    private static int isRtlText(int directionality) {
    /* Returns STATE_TRUE for strong RTL characters, STATE_FALSE for strong LTR characters, and
        switch (directionality) {
     * STATE_UNKNOWN for everything else.
     */
    private static int isRtlCodePoint(int codePoint) {
        switch (Character.getDirectionality(codePoint)) {
            case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
            case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
                return STATE_FALSE;
                return STATE_FALSE;
            case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
            case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
                return STATE_TRUE;
                return STATE_TRUE;
            default:
            case Character.DIRECTIONALITY_UNDEFINED:
                // Unassigned characters still have bidi direction, defined at:
                // http://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedBidiClass.txt

                if ((0x0590 <= codePoint && codePoint <= 0x08FF) ||
                        (0xFB1D <= codePoint && codePoint <= 0xFDCF) ||
                        (0xFDF0 <= codePoint && codePoint <= 0xFDFF) ||
                        (0xFE70 <= codePoint && codePoint <= 0xFEFF) ||
                        (0x10800 <= codePoint && codePoint <= 0x10FFF) ||
                        (0x1E800 <= codePoint && codePoint <= 0x1EFFF)) {
                    // Unassigned RTL character
                    return STATE_TRUE;
                } else if (
                        // Potentially-unassigned Default_Ignorable. Ranges are from unassigned
                        // characters that have Unicode property Other_Default_Ignorable_Code_Point
                        // plus some enlargening to cover bidi isolates and simplify checks.
                        (0x2065 <= codePoint && codePoint <= 0x2069) ||
                        (0xFFF0 <= codePoint && codePoint <= 0xFFF8) ||
                        (0xE0000 <= codePoint && codePoint <= 0xE0FFF) ||
                        // Non-character
                        (0xFDD0 <= codePoint && codePoint <= 0xFDEF) ||
                        ((codePoint & 0xFFFE) == 0xFFFE) ||
                        // Currency symbol
                        (0x20A0 <= codePoint && codePoint <= 0x20CF) ||
                        // Unpaired surrogate
                        (0xD800 <= codePoint && codePoint <= 0xDFFF)) {
                    return STATE_UNKNOWN;
                    return STATE_UNKNOWN;
        }
                } else {
    }
                    // Unassigned LTR character

    private static int isRtlTextOrFormat(int directionality) {
        switch (directionality) {
            case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
            case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
            case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
                    return STATE_FALSE;
                    return STATE_FALSE;
            case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
                }
            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
                return STATE_TRUE;
            default:
            default:
                return STATE_UNKNOWN;
                return STATE_UNKNOWN;
        }
        }
@@ -181,14 +199,26 @@ public class TextDirectionHeuristics {


    /**
    /**
     * Algorithm that uses the first strong directional character to determine the paragraph
     * Algorithm that uses the first strong directional character to determine the paragraph
     * direction. This is the standard Unicode Bidirectional algorithm.
     * direction. This is the standard Unicode Bidirectional Algorithm (steps P2 and P3), with the
     * exception that if no strong character is found, UNKNOWN is returned.
     */
     */
    private static class FirstStrong implements TextDirectionAlgorithm {
    private static class FirstStrong implements TextDirectionAlgorithm {
        @Override
        @Override
        public int checkRtl(CharSequence cs, int start, int count) {
        public int checkRtl(CharSequence cs, int start, int count) {
            int result = STATE_UNKNOWN;
            int result = STATE_UNKNOWN;
            for (int i = start, e = start + count; i < e && result == STATE_UNKNOWN; ++i) {
            int openIsolateCount = 0;
                result = isRtlTextOrFormat(Character.getDirectionality(cs.charAt(i)));
            for (int cp, i = start, end = start + count;
                    i < end && result == STATE_UNKNOWN;
                    i += Character.charCount(cp)) {
                cp = Character.codePointAt(cs, i);
                if (0x2066 <= cp && cp <= 0x2068) { // Opening isolates
                    openIsolateCount += 1;
                } else if (cp == 0x2069) { // POP DIRECTIONAL ISOLATE (PDI)
                    if (openIsolateCount > 0) openIsolateCount -= 1;
                } else if (openIsolateCount == 0) {
                    // Only consider the characters outside isolate pairs
                    result = isRtlCodePoint(cp);
                }
            }
            }
            return result;
            return result;
        }
        }
@@ -200,9 +230,10 @@ public class TextDirectionHeuristics {
    }
    }


    /**
    /**
     * Algorithm that uses the presence of any strong directional non-format
     * Algorithm that uses the presence of any strong directional character of the type indicated
     * character (e.g. excludes LRE, LRO, RLE, RLO) to determine the
     * in the constructor parameter to determine the direction of text.
     * direction of text.
     *
     * Characters inside isolate pairs are skipped.
     */
     */
    private static class AnyStrong implements TextDirectionAlgorithm {
    private static class AnyStrong implements TextDirectionAlgorithm {
        private final boolean mLookForRtl;
        private final boolean mLookForRtl;
@@ -210,8 +241,16 @@ public class TextDirectionHeuristics {
        @Override
        @Override
        public int checkRtl(CharSequence cs, int start, int count) {
        public int checkRtl(CharSequence cs, int start, int count) {
            boolean haveUnlookedFor = false;
            boolean haveUnlookedFor = false;
            for (int i = start, e = start + count; i < e; ++i) {
            int openIsolateCount = 0;
                switch (isRtlText(Character.getDirectionality(cs.charAt(i)))) {
            for (int cp, i = start, end = start + count; i < end; i += Character.charCount(cp)) {
                cp = Character.codePointAt(cs, i);
                if (0x2066 <= cp && cp <= 0x2068) { // Opening isolates
                    openIsolateCount += 1;
                } else if (cp == 0x2069) { // POP DIRECTIONAL ISOLATE (PDI)
                    if (openIsolateCount > 0) openIsolateCount -= 1;
                } else if (openIsolateCount == 0) {
                    // Only consider the characters outside isolate pairs
                    switch (isRtlCodePoint(cp)) {
                        case STATE_TRUE:
                        case STATE_TRUE:
                            if (mLookForRtl) {
                            if (mLookForRtl) {
                                return STATE_TRUE;
                                return STATE_TRUE;
@@ -228,6 +267,7 @@ public class TextDirectionHeuristics {
                            break;
                            break;
                    }
                    }
                }
                }
            }
            if (haveUnlookedFor) {
            if (haveUnlookedFor) {
                return mLookForRtl ? STATE_FALSE : STATE_TRUE;
                return mLookForRtl ? STATE_FALSE : STATE_TRUE;
            }
            }