Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2035b946 authored by Jean Chalard's avatar Jean Chalard Committed by Android (Google) Code Review
Browse files

Merge "Reinstate the shortcut-only attribute" into jb-mr1-dev

parents 27c093fc 72b1c939
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -172,12 +172,12 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
    // considering performance regression.
    protected void addWord(final String word, final String shortcutTarget, final int frequency) {
        if (shortcutTarget == null) {
            mFusionDictionary.add(word, frequency, null);
            mFusionDictionary.add(word, frequency, null, false /* isNotAWord */);
        } else {
            // TODO: Do this in the subclass, with this class taking an arraylist.
            final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList();
            shortcutTargets.add(new WeightedString(shortcutTarget, frequency));
            mFusionDictionary.add(word, frequency, shortcutTargets);
            mFusionDictionary.add(word, frequency, shortcutTargets, false /* isNotAWord */);
        }
    }

+21 −5
Original line number Diff line number Diff line
@@ -56,6 +56,8 @@ public class BinaryDictInputOutput {
     * s | has a terminal ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_TERMINAL
     *   | has shortcut targets ?      1 bit, 1 = yes, 0 = no   : FLAG_HAS_SHORTCUT_TARGETS
     *   | has bigrams ?               1 bit, 1 = yes, 0 = no   : FLAG_HAS_BIGRAMS
     *   | is not a word ?             1 bit, 1 = yes, 0 = no   : FLAG_IS_NOT_A_WORD
     *   | is blacklisted ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_BLACKLISTED
     *
     * c | IF FLAG_HAS_MULTIPLE_CHARS
     * h |   char, char, char, char    n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
@@ -154,6 +156,8 @@ public class BinaryDictInputOutput {
    private static final int FLAG_IS_TERMINAL = 0x10;
    private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
    private static final int FLAG_HAS_BIGRAMS = 0x04;
    private static final int FLAG_IS_NOT_A_WORD = 0x02;
    private static final int FLAG_IS_BLACKLISTED = 0x01;

    private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
    private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
@@ -779,6 +783,12 @@ public class BinaryDictInputOutput {
            }
            flags |= FLAG_HAS_BIGRAMS;
        }
        if (group.mIsNotAWord) {
            flags |= FLAG_IS_NOT_A_WORD;
        }
        if (group.mIsBlacklistEntry) {
            flags |= FLAG_IS_BLACKLISTED;
        }
        return flags;
    }

@@ -1353,12 +1363,14 @@ public class BinaryDictInputOutput {
                    buffer.position(currentPosition);
                }
                nodeContents.add(
                        new CharGroup(info.mCharacters, shortcutTargets,
                                bigrams, info.mFrequency, children));
                        new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
                                0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
                                0 != (info.mFlags & FLAG_IS_BLACKLISTED), children));
            } else {
                nodeContents.add(
                        new CharGroup(info.mCharacters, shortcutTargets,
                                bigrams, info.mFrequency));
                        new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
                                0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
                                0 != (info.mFlags & FLAG_IS_BLACKLISTED)));
            }
            groupOffset = info.mEndAddress;
        }
@@ -1574,7 +1586,11 @@ public class BinaryDictInputOutput {
                        0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)));
        if (null != dict) {
            for (final Word w : dict) {
                newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets);
                if (w.mIsBlacklistEntry) {
                    newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
                } else {
                    newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
                }
            }
            for (final Word w : dict) {
                // By construction a binary dictionary may not have bigrams pointing to
+58 −18
Original line number Diff line number Diff line
@@ -101,26 +101,34 @@ public class FusionDictionary implements Iterable<Word> {
        ArrayList<WeightedString> mBigrams;
        int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
        Node mChildren;
        boolean mIsNotAWord; // Only a shortcut
        boolean mIsBlacklistEntry;
        // The two following members to help with binary generation
        int mCachedSize;
        int mCachedAddress;

        public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
                final ArrayList<WeightedString> bigrams, final int frequency) {
                final ArrayList<WeightedString> bigrams, final int frequency,
                final boolean isNotAWord, final boolean isBlacklistEntry) {
            mChars = chars;
            mFrequency = frequency;
            mShortcutTargets = shortcutTargets;
            mBigrams = bigrams;
            mChildren = null;
            mIsNotAWord = isNotAWord;
            mIsBlacklistEntry = isBlacklistEntry;
        }

        public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
                final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
                final ArrayList<WeightedString> bigrams, final int frequency,
                final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) {
            mChars = chars;
            mFrequency = frequency;
            mShortcutTargets = shortcutTargets;
            mBigrams = bigrams;
            mChildren = children;
            mIsNotAWord = isNotAWord;
            mIsBlacklistEntry = isBlacklistEntry;
        }

        public void addChild(CharGroup n) {
@@ -197,8 +205,9 @@ public class FusionDictionary implements Iterable<Word> {
         * the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only
         * updated if they are higher than the existing ones.
         */
        public void update(int frequency, ArrayList<WeightedString> shortcutTargets,
                ArrayList<WeightedString> bigrams) {
        public void update(final int frequency, final ArrayList<WeightedString> shortcutTargets,
                final ArrayList<WeightedString> bigrams,
                final boolean isNotAWord, final boolean isBlacklistEntry) {
            if (frequency > mFrequency) {
                mFrequency = frequency;
            }
@@ -234,6 +243,8 @@ public class FusionDictionary implements Iterable<Word> {
                    }
                }
            }
            mIsNotAWord = isNotAWord;
            mIsBlacklistEntry = isBlacklistEntry;
        }
    }

@@ -296,10 +307,24 @@ public class FusionDictionary implements Iterable<Word> {
     * @param word the word to add.
     * @param frequency the frequency of the word, in the range [0..255].
     * @param shortcutTargets a list of shortcut targets for this word, or null.
     * @param isNotAWord true if this should not be considered a word (e.g. shortcut only)
     */
    public void add(final String word, final int frequency,
            final ArrayList<WeightedString> shortcutTargets) {
        add(getCodePoints(word), frequency, shortcutTargets);
            final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
        add(getCodePoints(word), frequency, shortcutTargets, isNotAWord,
                false /* isBlacklistEntry */);
    }

    /**
     * Helper method to add a blacklist entry as a string.
     *
     * @param word the word to add as a blacklist entry.
     * @param shortcutTargets a list of shortcut targets for this word, or null.
     * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
     */
    public void addBlacklistEntry(final String word,
            final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
        add(getCodePoints(word), 0, shortcutTargets, isNotAWord, true /* isBlacklistEntry */);
    }

    /**
@@ -332,7 +357,8 @@ public class FusionDictionary implements Iterable<Word> {
        if (charGroup != null) {
            final CharGroup charGroup2 = findWordInTree(mRoot, word2);
            if (charGroup2 == null) {
                add(getCodePoints(word2), 0, null);
                add(getCodePoints(word2), 0, null, false /* isNotAWord */,
                        false /* isBlacklistEntry */);
            }
            charGroup.addBigram(word2, frequency);
        } else {
@@ -349,9 +375,12 @@ public class FusionDictionary implements Iterable<Word> {
     * @param word the word, as an int array.
     * @param frequency the frequency of the word, in the range [0..255].
     * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
     * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
     * @param isBlacklistEntry true if this is a blacklisted word, false otherwise
     */
    private void add(final int[] word, final int frequency,
            final ArrayList<WeightedString> shortcutTargets) {
            final ArrayList<WeightedString> shortcutTargets,
            final boolean isNotAWord, final boolean isBlacklistEntry) {
        assert(frequency >= 0 && frequency <= 255);
        Node currentNode = mRoot;
        int charIndex = 0;
@@ -376,7 +405,7 @@ public class FusionDictionary implements Iterable<Word> {
            final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
            final CharGroup newGroup = new CharGroup(
                    Arrays.copyOfRange(word, charIndex, word.length),
                    shortcutTargets, null /* bigrams */, frequency);
                    shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
            currentNode.mData.add(insertionIndex, newGroup);
            if (DBG) checkStack(currentNode);
        } else {
@@ -386,13 +415,15 @@ public class FusionDictionary implements Iterable<Word> {
                    // The new word is a prefix of an existing word, but the node on which it
                    // should end already exists as is. Since the old CharNode was not a terminal, 
                    // make it one by filling in its frequency and other attributes
                    currentGroup.update(frequency, shortcutTargets, null);
                    currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
                            isBlacklistEntry);
                } else {
                    // The new word matches the full old word and extends past it.
                    // We only have to create a new node and add it to the end of this.
                    final CharGroup newNode = new CharGroup(
                            Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
                                    shortcutTargets, null /* bigrams */, frequency);
                                    shortcutTargets, null /* bigrams */, frequency, isNotAWord,
                                    isBlacklistEntry);
                    currentGroup.mChildren = new Node();
                    currentGroup.mChildren.mData.add(newNode);
                }
@@ -400,7 +431,9 @@ public class FusionDictionary implements Iterable<Word> {
                if (0 == differentCharIndex) {
                    // Exact same word. Update the frequency if higher. This will also add the
                    // new shortcuts to the existing shortcut list if it already exists.
                    currentGroup.update(frequency, shortcutTargets, null);
                    currentGroup.update(frequency, shortcutTargets, null,
                            currentGroup.mIsNotAWord && isNotAWord,
                            currentGroup.mIsBlacklistEntry || isBlacklistEntry);
                } else {
                    // Partial prefix match only. We have to replace the current node with a node
                    // containing the current prefix and create two new ones for the tails.
@@ -408,21 +441,26 @@ public class FusionDictionary implements Iterable<Word> {
                    final CharGroup newOldWord = new CharGroup(
                            Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
                                    currentGroup.mChars.length), currentGroup.mShortcutTargets,
                            currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
                            currentGroup.mBigrams, currentGroup.mFrequency,
                            currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry,
                            currentGroup.mChildren);
                    newChildren.mData.add(newOldWord);

                    final CharGroup newParent;
                    if (charIndex + differentCharIndex >= word.length) {
                        newParent = new CharGroup(
                                Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
                                shortcutTargets, null /* bigrams */, frequency, newChildren);
                                shortcutTargets, null /* bigrams */, frequency,
                                isNotAWord, isBlacklistEntry, newChildren);
                    } else {
                        newParent = new CharGroup(
                                Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
                                null /* shortcutTargets */, null /* bigrams */, -1, newChildren);
                                null /* shortcutTargets */, null /* bigrams */, -1, 
                                false /* isNotAWord */, false /* isBlacklistEntry */, newChildren);
                        final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word,
                                charIndex + differentCharIndex, word.length),
                                shortcutTargets, null /* bigrams */, frequency);
                                shortcutTargets, null /* bigrams */, frequency,
                                isNotAWord, isBlacklistEntry);
                        final int addIndex = word[charIndex + differentCharIndex]
                                > currentGroup.mChars[differentCharIndex] ? 1 : 0;
                        newChildren.mData.add(addIndex, newWord);
@@ -483,7 +521,8 @@ public class FusionDictionary implements Iterable<Word> {
    private static int findInsertionIndex(final Node node, int character) {
        final ArrayList<CharGroup> data = node.mData;
        final CharGroup reference = new CharGroup(new int[] { character },
                null /* shortcutTargets */, null /* bigrams */, 0);
                null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
                false /* isBlacklistEntry */);
        int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
        return result >= 0 ? result : -result - 1;
    }
@@ -748,7 +787,8 @@ public class FusionDictionary implements Iterable<Word> {
                    }
                    if (currentGroup.mFrequency >= 0)
                        return new Word(mCurrentString.toString(), currentGroup.mFrequency,
                                currentGroup.mShortcutTargets, currentGroup.mBigrams);
                                currentGroup.mShortcutTargets, currentGroup.mBigrams,
                                currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry);
                } else {
                    mPositions.removeLast();
                    currentPos = mPositions.getLast();
+12 −3
Original line number Diff line number Diff line
@@ -31,16 +31,21 @@ public class Word implements Comparable<Word> {
    public final int mFrequency;
    public final ArrayList<WeightedString> mShortcutTargets;
    public final ArrayList<WeightedString> mBigrams;
    public final boolean mIsNotAWord;
    public final boolean mIsBlacklistEntry;

    private int mHashCode = 0;

    public Word(final String word, final int frequency,
            final ArrayList<WeightedString> shortcutTargets,
            final ArrayList<WeightedString> bigrams) {
            final ArrayList<WeightedString> bigrams,
            final boolean isNotAWord, final boolean isBlacklistEntry) {
        mWord = word;
        mFrequency = frequency;
        mShortcutTargets = shortcutTargets;
        mBigrams = bigrams;
        mIsNotAWord = isNotAWord;
        mIsBlacklistEntry = isBlacklistEntry;
    }

    private static int computeHashCode(Word word) {
@@ -48,7 +53,9 @@ public class Word implements Comparable<Word> {
                word.mWord,
                word.mFrequency,
                word.mShortcutTargets.hashCode(),
                word.mBigrams.hashCode()
                word.mBigrams.hashCode(),
                word.mIsNotAWord,
                word.mIsBlacklistEntry
        });
    }

@@ -78,7 +85,9 @@ public class Word implements Comparable<Word> {
        Word w = (Word)o;
        return mFrequency == w.mFrequency && mWord.equals(w.mWord)
                && mShortcutTargets.equals(w.mShortcutTargets)
                && mBigrams.equals(w.mBigrams);
                && mBigrams.equals(w.mBigrams)
                && mIsNotAWord == w.mIsNotAWord
                && mIsBlacklistEntry == w.mIsBlacklistEntry;
    }

    @Override
+4 −0
Original line number Diff line number Diff line
@@ -43,6 +43,10 @@ class BinaryFormat {
    static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
    // Flag for bigram presence
    static const int FLAG_HAS_BIGRAMS = 0x04;
    // Flag for non-words (typically, shortcut only entries)
    static const int FLAG_IS_NOT_A_WORD = 0x02;
    // Flag for blacklist
    static const int FLAG_IS_BLACKLISTED = 0x01;

    // Attribute (bigram/shortcut) related flags:
    // Flag for presence of more attributes
Loading