Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c8399997 authored by Jean Chalard's avatar Jean Chalard Committed by Android (Google) Code Review
Browse files

Merge "Actually add shortcut-only entries."

parents 382f44f3 c599f2e9
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -606,7 +606,9 @@ public class BinaryDictInputOutput {
            }
            flags |= FLAG_HAS_BIGRAMS;
        }
        // TODO: fill in the FLAG_IS_SHORTCUT_ONLY
        if (group.mIsShortcutOnly) {
            flags |= FLAG_IS_SHORTCUT_ONLY;
        }
        return flags;
    }

+50 −22
Original line number Diff line number Diff line
@@ -170,6 +170,24 @@ public class FusionDictionary implements Iterable<Word> {
        return array;
    }

    /**
     * Helper method to add all words in a list as 0-frequency entries
     *
     * These words are added when shortcuts targets or bigrams are not found in the dictionary
     * yet. The same words may be added later with an actual frequency - this is handled by
     * the private version of add().
     */
    private void addNeutralWords(final ArrayList<WeightedString> words) {
        if (null != words) {
            for (WeightedString word : words) {
                final CharGroup t = findWordInTree(mRoot, word.mWord);
                if (null == t) {
                    add(getCodePoints(word.mWord), 0, null, null, false /* isShortcutOnly */);
                }
            }
        }
    }

    /**
     * Helper method to add a word as a string.
     *
@@ -186,22 +204,12 @@ public class FusionDictionary implements Iterable<Word> {
            final ArrayList<WeightedString> shortcutTargets,
            final ArrayList<WeightedString> bigrams) {
        if (null != shortcutTargets) {
            for (WeightedString target : shortcutTargets) {
                final CharGroup t = findWordInTree(mRoot, target.mWord);
                if (null == t) {
                    add(getCodePoints(target.mWord), 0, null, null);
                }
            }
            addNeutralWords(shortcutTargets);
        }
        if (null != bigrams) {
            for (WeightedString bigram : bigrams) {
                final CharGroup t = findWordInTree(mRoot, bigram.mWord);
                if (null == t) {
                    add(getCodePoints(bigram.mWord), 0, null, null);
                }
            }
            addNeutralWords(bigrams);
        }
        add(getCodePoints(word), frequency, shortcutTargets, bigrams);
        add(getCodePoints(word), frequency, shortcutTargets, bigrams, false /* isShortcutOnly */);
    }

    /**
@@ -222,6 +230,22 @@ public class FusionDictionary implements Iterable<Word> {
        }
    }

    /**
     * Helper method to add a shortcut that should not be a dictionary word.
     *
     * @param word the word to add.
     * @param frequency the frequency of the word, in the range [0..255].
     * @param shortcutTargets a list of shortcut targets. May not be null.
     */
    public void addShortcutOnly(final String word, final int frequency,
            final ArrayList<WeightedString> shortcutTargets) {
        if (null == shortcutTargets) {
            throw new RuntimeException("Can't add a shortcut without targets");
        }
        addNeutralWords(shortcutTargets);
        add(getCodePoints(word), frequency, shortcutTargets, null, true /* isShortcutOnly */);
    }

    /**
     * Add a word to this dictionary.
     *
@@ -232,10 +256,12 @@ public class FusionDictionary implements Iterable<Word> {
     * @param frequency the frequency of the word, in the range [0..255].
     * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
     * @param bigrams an optional list of bigrams for this word (null if none).
     * @param isShortcutOnly whether this should be a shortcut only.
     */
    private void add(final int[] word, final int frequency,
            final ArrayList<WeightedString> shortcutTargets,
            final ArrayList<WeightedString> bigrams) {
            final ArrayList<WeightedString> bigrams,
            final boolean isShortcutOnly) {
        assert(frequency >= 0 && frequency <= 255);
        Node currentNode = mRoot;
        int charIndex = 0;
@@ -260,7 +286,7 @@ public class FusionDictionary implements Iterable<Word> {
            final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
            final CharGroup newGroup = new CharGroup(
                    Arrays.copyOfRange(word, charIndex, word.length),
                    shortcutTargets, bigrams, frequency, false /* isShortcutOnly */);
                    shortcutTargets, bigrams, frequency, isShortcutOnly);
            currentNode.mData.add(insertionIndex, newGroup);
            checkStack(currentNode);
        } else {
@@ -275,7 +301,7 @@ public class FusionDictionary implements Iterable<Word> {
                    } else {
                        final CharGroup newNode = new CharGroup(currentGroup.mChars,
                                shortcutTargets, bigrams, frequency, currentGroup.mChildren,
                                false /* isShortcutOnly */);
                                isShortcutOnly);
                        currentNode.mData.set(nodeIndex, newNode);
                        checkStack(currentNode);
                    }
@@ -284,8 +310,7 @@ public class FusionDictionary implements Iterable<Word> {
                    // We only have to create a new node and add it to the end of this.
                    final CharGroup newNode = new CharGroup(
                            Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
                                    shortcutTargets, bigrams, frequency,
                                    false /* isShortcutOnly */);
                                    shortcutTargets, bigrams, frequency, isShortcutOnly);
                    currentGroup.mChildren = new Node();
                    currentGroup.mChildren.mData.add(newNode);
                }
@@ -300,7 +325,8 @@ public class FusionDictionary implements Iterable<Word> {
                        }
                        final CharGroup newGroup = new CharGroup(word,
                                currentGroup.mShortcutTargets, currentGroup.mBigrams,
                                frequency, currentGroup.mChildren, false /* isShortcutOnly */);
                                frequency, currentGroup.mChildren,
                                currentGroup.mIsShortcutOnly && isShortcutOnly);
                        currentNode.mData.set(nodeIndex, newGroup);
                    }
                } else {
@@ -318,16 +344,18 @@ public class FusionDictionary implements Iterable<Word> {
                    if (charIndex + differentCharIndex >= word.length) {
                        newParent = new CharGroup(
                                Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
                                shortcutTargets, bigrams, frequency, newChildren,
                                false /* isShortcutOnly */);
                                shortcutTargets, bigrams, frequency, newChildren, isShortcutOnly);
                    } else {
                        // isShortcutOnly makes no sense for non-terminal nodes. The following node
                        // is non-terminal (frequency 0 in FusionDictionary representation) so we
                        // pass false for isShortcutOnly
                        newParent = new CharGroup(
                                Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
                                null, null, -1, newChildren, false /* isShortcutOnly */);
                        final CharGroup newWord = new CharGroup(
                                Arrays.copyOfRange(word, charIndex + differentCharIndex,
                                        word.length), shortcutTargets, bigrams, frequency,
                                        false /* isShortcutOnly */);
                                        isShortcutOnly);
                        final int addIndex = word[charIndex + differentCharIndex]
                                > currentGroup.mChars[differentCharIndex] ? 1 : 0;
                        newChildren.mData.add(addIndex, newWord);
+11 −0
Original line number Diff line number Diff line
@@ -47,6 +47,8 @@ public class XmlDictInputOutput {
    private static final String WORD_ATTR = "word";
    private static final String SHORTCUT_ONLY_ATTR = "shortcutOnly";

    private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;

    /**
     * SAX handler for a unigram XML file.
     */
@@ -233,6 +235,15 @@ public class XmlDictInputOutput {
                new UnigramHandler(dict, shortcutHandler.getShortcutMap(),
                        bigramHandler.getBigramMap());
        parser.parse(unigrams, unigramHandler);

        final HashMap<String, ArrayList<WeightedString>> shortcutMap =
                shortcutHandler.getShortcutMap();
        for (final String shortcut : shortcutMap.keySet()) {
            if (dict.hasWord(shortcut)) continue;
            // TODO: list a frequency in the shortcut file and use it here, instead of
            // a constant freq
            dict.addShortcutOnly(shortcut, SHORTCUT_ONLY_DEFAULT_FREQ, shortcutMap.get(shortcut));
        }
        return dict;
    }