Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ca132ca6 authored by TreeHugger Robot's avatar TreeHugger Robot Committed by Android (Google) Code Review
Browse files

Merge "Refactor WordIterator to avoid copying to internal buffers"

parents 8ff3c926 b08a0716
Loading
Loading
Loading
Loading
+108 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2016 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package android.text;

import android.annotation.NonNull;

import java.text.CharacterIterator;

/**
 * An implementation of {@link java.text.CharacterIterator} that iterates over a given CharSequence.
 * {@hide}
 */
public class CharSequenceCharacterIterator implements CharacterIterator {
    private final int mBeginIndex, mEndIndex;
    private int mIndex;
    private final CharSequence mCharSeq;

    /**
     * Constructs the iterator given a CharSequence and a range. The position of the iterator index
     * is set to the beginning of the range.
     */
    public CharSequenceCharacterIterator(@NonNull CharSequence text, int start, int end) {
        mCharSeq = text;
        mBeginIndex = mIndex = start;
        mEndIndex = end;
    }

    public char first() {
        mIndex = mBeginIndex;
        return current();
    }

    public char last() {
        if (mBeginIndex == mEndIndex) {
            mIndex = mEndIndex;
            return DONE;
        } else {
            mIndex = mEndIndex - 1;
            return mCharSeq.charAt(mIndex);
        }
    }

    public char current() {
        return (mIndex == mEndIndex) ? DONE : mCharSeq.charAt(mIndex);
    }

    public char next() {
        mIndex++;
        if (mIndex >= mEndIndex) {
            mIndex = mEndIndex;
            return DONE;
        } else {
            return mCharSeq.charAt(mIndex);
        }
    }

    public char previous() {
        if (mIndex <= mBeginIndex) {
            return DONE;
        } else {
            mIndex--;
            return mCharSeq.charAt(mIndex);
        }
    }

    public char setIndex(int position) {
        if (mBeginIndex <= position && position <= mEndIndex) {
            mIndex = position;
            return current();
        } else {
            throw new IllegalArgumentException("invalid position");
        }
    }

    public int getBeginIndex() {
        return mBeginIndex;
    }

    public int getEndIndex() {
        return mEndIndex;
    }

    public int getIndex() {
        return mIndex;
    }

    public Object clone() {
        try {
            return super.clone();
        } catch (CloneNotSupportedException e) {
            throw new InternalError();
        }
    }
}
+66 −92
Original line number Diff line number Diff line

/*
 * Copyright (C) 2011 The Android Open Source Project
 *
@@ -17,10 +16,11 @@

package android.text.method;

import android.annotation.NonNull;
import android.icu.text.BreakIterator;
import android.text.Selection;
import android.text.SpannableStringBuilder;
import android.text.CharSequenceCharacterIterator;

import android.icu.text.BreakIterator;
import java.util.Locale;

/**
@@ -35,10 +35,9 @@ public class WordIterator implements Selection.PositionIterator {
    // Size of the window for the word iterator, should be greater than the longest word's length
    private static final int WINDOW_WIDTH = 50;

    private String mString;
    private int mOffsetShift;

    private BreakIterator mIterator;
    private int mStart, mEnd;
    private CharSequence mCharSeq;
    private final BreakIterator mIterator;

    /**
     * Constructs a WordIterator using the default locale.
@@ -49,59 +48,49 @@ public class WordIterator implements Selection.PositionIterator {

    /**
     * Constructs a new WordIterator for the specified locale.
     * @param locale The locale to be used when analysing the text.
     * @param locale The locale to be used for analyzing the text.
     */
    public WordIterator(Locale locale) {
        mIterator = BreakIterator.getWordInstance(locale);
    }

    public void setCharSequence(CharSequence charSequence, int start, int end) {
        mOffsetShift = Math.max(0, start - WINDOW_WIDTH);
        final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);

        if (charSequence instanceof SpannableStringBuilder) {
            mString = ((SpannableStringBuilder) charSequence).substring(mOffsetShift, windowEnd);
    public void setCharSequence(@NonNull CharSequence charSequence, int start, int end) {
        if (0 <= start && end <= charSequence.length()) {
            mCharSeq = charSequence;
            mStart = Math.max(0, start - WINDOW_WIDTH);
            mEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);
            mIterator.setText(new CharSequenceCharacterIterator(charSequence, mStart, mEnd));
        } else {
            mString = charSequence.subSequence(mOffsetShift, windowEnd).toString();
            throw new IndexOutOfBoundsException("input indexes are outside the CharSequence");
        }
        mIterator.setText(mString);
    }

    /** {@inheritDoc} */
    public int preceding(int offset) {
        int shiftedOffset = offset - mOffsetShift;
        checkOffsetIsValid(shiftedOffset);
        do {
            shiftedOffset = mIterator.preceding(shiftedOffset);
            if (shiftedOffset == BreakIterator.DONE) {
                return BreakIterator.DONE;
        checkOffsetIsValid(offset);
        while (true) {
            offset = mIterator.preceding(offset);
            if (offset == BreakIterator.DONE || isOnLetterOrDigit(offset)) {
                return offset;
            }
            if (isOnLetterOrDigit(shiftedOffset)) {
                return shiftedOffset + mOffsetShift;
        }
        } while (true);
    }

    /** {@inheritDoc} */
    public int following(int offset) {
        int shiftedOffset = offset - mOffsetShift;
        checkOffsetIsValid(shiftedOffset);
        do {
            shiftedOffset = mIterator.following(shiftedOffset);
            if (shiftedOffset == BreakIterator.DONE) {
                return BreakIterator.DONE;
        checkOffsetIsValid(offset);
        while (true) {
            offset = mIterator.following(offset);
            if (offset == BreakIterator.DONE || isAfterLetterOrDigit(offset)) {
                return offset;
            }
            if (isAfterLetterOrDigit(shiftedOffset)) {
                return shiftedOffset + mOffsetShift;
        }
        } while (true);
    }

    /** {@inheritDoc} */
    public boolean isBoundary(int offset) {
        int shiftedOffset = offset - mOffsetShift;
        checkOffsetIsValid(shiftedOffset);
        return mIterator.isBoundary(shiftedOffset);
        checkOffsetIsValid(offset);
        return mIterator.isBoundary(offset);
    }

    /**
@@ -112,13 +101,8 @@ public class WordIterator implements Selection.PositionIterator {
     * @return the position of the last boundary preceding the given offset.
     */
    public int nextBoundary(int offset) {
        int shiftedOffset = offset - mOffsetShift;
        checkOffsetIsValid(shiftedOffset);
        shiftedOffset = mIterator.following(shiftedOffset);
        if (shiftedOffset == BreakIterator.DONE) {
            return BreakIterator.DONE;
        }
        return shiftedOffset + mOffsetShift;
        checkOffsetIsValid(offset);
        return mIterator.following(offset);
    }

    /**
@@ -129,13 +113,8 @@ public class WordIterator implements Selection.PositionIterator {
     * @return the position of the last boundary preceding the given offset.
     */
    public int prevBoundary(int offset) {
        int shiftedOffset = offset - mOffsetShift;
        checkOffsetIsValid(shiftedOffset);
        shiftedOffset = mIterator.preceding(shiftedOffset);
        if (shiftedOffset == BreakIterator.DONE) {
            return BreakIterator.DONE;
        }
        return shiftedOffset + mOffsetShift;
        checkOffsetIsValid(offset);
        return mIterator.preceding(offset);
    }

    /** If <code>offset</code> is within a word, returns the index of the first character of that
@@ -228,20 +207,19 @@ public class WordIterator implements Selection.PositionIterator {
     * @throws IllegalArgumentException is offset is not valid.
     */
    private int getBeginning(int offset, boolean getPrevWordBeginningOnTwoWordsBoundary) {
        final int shiftedOffset = offset - mOffsetShift;
        checkOffsetIsValid(shiftedOffset);
        checkOffsetIsValid(offset);

        if (isOnLetterOrDigit(shiftedOffset)) {
            if (mIterator.isBoundary(shiftedOffset)
                    && (!isAfterLetterOrDigit(shiftedOffset)
        if (isOnLetterOrDigit(offset)) {
            if (mIterator.isBoundary(offset)
                    && (!isAfterLetterOrDigit(offset)
                            || !getPrevWordBeginningOnTwoWordsBoundary)) {
                return shiftedOffset + mOffsetShift;
                return offset;
            } else {
                return mIterator.preceding(shiftedOffset) + mOffsetShift;
                return mIterator.preceding(offset);
            }
        } else {
            if (isAfterLetterOrDigit(shiftedOffset)) {
                return mIterator.preceding(shiftedOffset) + mOffsetShift;
            if (isAfterLetterOrDigit(offset)) {
                return mIterator.preceding(offset);
            }
        }
        return BreakIterator.DONE;
@@ -264,19 +242,18 @@ public class WordIterator implements Selection.PositionIterator {
     * @throws IllegalArgumentException is offset is not valid.
     */
    private int getEnd(int offset, boolean getNextWordEndOnTwoWordBoundary) {
        final int shiftedOffset = offset - mOffsetShift;
        checkOffsetIsValid(shiftedOffset);
        checkOffsetIsValid(offset);

        if (isAfterLetterOrDigit(shiftedOffset)) {
            if (mIterator.isBoundary(shiftedOffset)
                    && (!isOnLetterOrDigit(shiftedOffset) || !getNextWordEndOnTwoWordBoundary)) {
                return shiftedOffset + mOffsetShift;
        if (isAfterLetterOrDigit(offset)) {
            if (mIterator.isBoundary(offset)
                    && (!isOnLetterOrDigit(offset) || !getNextWordEndOnTwoWordBoundary)) {
                return offset;
            } else {
                return mIterator.following(shiftedOffset) + mOffsetShift;
                return mIterator.following(offset);
            }
        } else {
            if (isOnLetterOrDigit(shiftedOffset)) {
                return mIterator.following(shiftedOffset) + mOffsetShift;
            if (isOnLetterOrDigit(offset)) {
                return mIterator.following(offset);
            }
        }
        return BreakIterator.DONE;
@@ -290,7 +267,7 @@ public class WordIterator implements Selection.PositionIterator {
     * @param offset the offset to search from.
     */
    public int getPunctuationBeginning(int offset) {
        checkOffsetIsValid(offset - mOffsetShift);
        checkOffsetIsValid(offset);
        while (offset != BreakIterator.DONE && !isPunctuationStartBoundary(offset)) {
            offset = prevBoundary(offset);
        }
@@ -306,7 +283,7 @@ public class WordIterator implements Selection.PositionIterator {
     * @param offset the offset to search from.
     */
    public int getPunctuationEnd(int offset) {
        checkOffsetIsValid(offset - mOffsetShift);
        checkOffsetIsValid(offset);
        while (offset != BreakIterator.DONE && !isPunctuationEndBoundary(offset)) {
            offset = nextBoundary(offset);
        }
@@ -322,9 +299,8 @@ public class WordIterator implements Selection.PositionIterator {
     * @return Whether the offset is after a punctuation character.
     */
    public boolean isAfterPunctuation(int offset) {
        final int shiftedOffset = offset - mOffsetShift;
        if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
            final int codePoint = mString.codePointBefore(shiftedOffset);
        if (mStart < offset && offset <= mEnd) {
            final int codePoint = Character.codePointBefore(mCharSeq, offset);
            return isPunctuation(codePoint);
        }
        return false;
@@ -338,9 +314,8 @@ public class WordIterator implements Selection.PositionIterator {
     * @return Whether the offset is at a punctuation character.
     */
    public boolean isOnPunctuation(int offset) {
        final int shiftedOffset = offset - mOffsetShift;
        if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
            final int codePoint = mString.codePointAt(shiftedOffset);
        if (mStart <= offset && offset < mEnd) {
            final int codePoint = Character.codePointAt(mCharSeq, offset);
            return isPunctuation(codePoint);
        }
        return false;
@@ -354,8 +329,8 @@ public class WordIterator implements Selection.PositionIterator {
        return !isOnPunctuation(offset) && isAfterPunctuation(offset);
    }

    private boolean isPunctuation(int cp) {
        int type = Character.getType(cp);
    private static boolean isPunctuation(int cp) {
        final int type = Character.getType(cp);
        return (type == Character.CONNECTOR_PUNCTUATION ||
                type == Character.DASH_PUNCTUATION ||
                type == Character.END_PUNCTUATION ||
@@ -365,27 +340,26 @@ public class WordIterator implements Selection.PositionIterator {
                type == Character.START_PUNCTUATION);
    }

    private boolean isAfterLetterOrDigit(int shiftedOffset) {
        if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
            final int codePoint = mString.codePointBefore(shiftedOffset);
    private boolean isAfterLetterOrDigit(int offset) {
        if (mStart < offset && offset <= mEnd) {
            final int codePoint = Character.codePointBefore(mCharSeq, offset);
            if (Character.isLetterOrDigit(codePoint)) return true;
        }
        return false;
    }

    private boolean isOnLetterOrDigit(int shiftedOffset) {
        if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
            final int codePoint = mString.codePointAt(shiftedOffset);
    private boolean isOnLetterOrDigit(int offset) {
        if (mStart <= offset && offset < mEnd) {
            final int codePoint = Character.codePointAt(mCharSeq, offset);
            if (Character.isLetterOrDigit(codePoint)) return true;
        }
        return false;
    }

    private void checkOffsetIsValid(int shiftedOffset) {
        if (shiftedOffset < 0 || shiftedOffset > mString.length()) {
            throw new IllegalArgumentException("Invalid offset: " + (shiftedOffset + mOffsetShift) +
                    ". Valid range is [" + mOffsetShift + ", " + (mString.length() + mOffsetShift) +
                    "]");
    private void checkOffsetIsValid(int offset) {
        if (!(mStart <= offset && offset <= mEnd)) {
            throw new IllegalArgumentException("Invalid offset: " + (offset) +
                    ". Valid range is [" + mStart + ", " + mEnd + "]");
        }
    }
}
+43 −0
Original line number Diff line number Diff line
@@ -48,6 +48,23 @@ public class WordIteratorTest extends AndroidTestCase {
        wordIterator.setCharSequence(text, text.length(), text.length());
    }

    @SmallTest
    public void testWindowWidth() {
        final String text = "aaaa bbbb cccc dddd eeee ffff gggg hhhh iiii jjjj kkkk llll mmmm nnnn";
        WordIterator wordIterator = new WordIterator(Locale.ENGLISH);

        // The first 'n' is more than 50 characters into the string.
        wordIterator.setCharSequence(text, text.indexOf('n'), text.length());
        final int expectedWindowStart = text.indexOf('n') - 50;
        assertEquals(expectedWindowStart, wordIterator.preceding(expectedWindowStart + 1));
        assertEquals(BreakIterator.DONE, wordIterator.preceding(expectedWindowStart));

        wordIterator.setCharSequence(text, 0, 1);
        final int expectedWindowEnd = 1 + 50;
        assertEquals(expectedWindowEnd, wordIterator.following(expectedWindowEnd - 1));
        assertEquals(BreakIterator.DONE, wordIterator.following(expectedWindowEnd));
    }

    @SmallTest
    public void testPreceding() {
        final String text = "abc def-ghi. jkl";
@@ -73,6 +90,19 @@ public class WordIteratorTest extends AndroidTestCase {
        assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('h')));
        assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('j')));
        assertEquals(text.indexOf('j'), wordIterator.preceding(text.indexOf('l')));

        // The results should be the same even if we set an smaller window, since WordIterator
        // enlargens the window by 50 code units on each side anyway.
        wordIterator.setCharSequence(text, text.indexOf('d'), text.indexOf('e'));

        assertEquals(BreakIterator.DONE, wordIterator.preceding(text.indexOf('a')));
        assertEquals(text.indexOf('a'), wordIterator.preceding(text.indexOf('c')));
        assertEquals(text.indexOf('a'), wordIterator.preceding(text.indexOf('d')));
        assertEquals(text.indexOf('d'), wordIterator.preceding(text.indexOf('e')));
        assertEquals(text.indexOf('d'), wordIterator.preceding(text.indexOf('g')));
        assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('h')));
        assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('j')));
        assertEquals(text.indexOf('j'), wordIterator.preceding(text.indexOf('l')));
    }

    @SmallTest
@@ -100,6 +130,19 @@ public class WordIteratorTest extends AndroidTestCase {
        assertEquals(text.indexOf('i') + 1, wordIterator.following(text.indexOf('g')));
        assertEquals(text.length(), wordIterator.following(text.indexOf('j')));
        assertEquals(BreakIterator.DONE, wordIterator.following(text.length()));

        // The results should be the same even if we set an smaller window, since WordIterator
        // enlargens the window by 50 code units on each side anyway.
        wordIterator.setCharSequence(text, text.indexOf('d'), text.indexOf('e'));

        assertEquals(text.indexOf('c') + 1, wordIterator.following(text.indexOf('a')));
        assertEquals(text.indexOf('c') + 1, wordIterator.following(text.indexOf('c')));
        assertEquals(text.indexOf('f') + 1, wordIterator.following(text.indexOf('c') + 1));
        assertEquals(text.indexOf('f') + 1, wordIterator.following(text.indexOf('d')));
        assertEquals(text.indexOf('i') + 1, wordIterator.following(text.indexOf('-')));
        assertEquals(text.indexOf('i') + 1, wordIterator.following(text.indexOf('g')));
        assertEquals(text.length(), wordIterator.following(text.indexOf('j')));
        assertEquals(BreakIterator.DONE, wordIterator.following(text.length()));
    }

    @SmallTest