Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 31b90a36 authored by Jean Chalard's avatar Jean Chalard
Browse files

Support a syntax to add whitelist entries in shortcuts

Bug: 6906525
Change-Id: Ie33586c07973bf76e38390766175ba424d72d655
parent 5bb42062
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -112,7 +112,7 @@ public class DictionaryMaker {

        public static String getHelp() {
            return "Usage: makedict "
                    + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] "
                    + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts_and_whitelist.xml>] "
                    + "| -s <binary input>] [-d <binary output format version 2>] "
                    + "[-d1 <binary output format version 1>] [-x <xml output>] [-2]\n"
                    + "\n"
@@ -246,7 +246,7 @@ public class DictionaryMaker {
     * Read a dictionary from a unigram XML file, and optionally a bigram XML file.
     *
     * @param unigramXmlFilename the name of the unigram XML file. May not be null.
     * @param shortcutXmlFilename the name of the shortcut XML file, or null if there is none.
     * @param shortcutXmlFilename the name of the shortcut/whitelist XML file, or null if none.
     * @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams.
     * @return the read dictionary.
     * @throws FileNotFoundException if one of the files can't be found
+34 −11
Original line number Diff line number Diff line
@@ -179,7 +179,7 @@ public class XmlDictInputOutput {
                mSrc = attrs.getValue(uri, SRC_ATTRIBUTE);
            } else if (DST_TAG.equals(localName)) {
                String dst = attrs.getValue(uri, DST_ATTRIBUTE);
                int freq = Integer.parseInt(attrs.getValue(uri, DST_FREQ));
                int freq = getValueFromFreqString(attrs.getValue(uri, DST_FREQ));
                WeightedString bigram = new WeightedString(dst, freq / XML_TO_MEMORY_RATIO);
                ArrayList<WeightedString> bigramList = mAssocMap.get(mSrc);
                if (null == bigramList) bigramList = new ArrayList<WeightedString>();
@@ -188,6 +188,10 @@ public class XmlDictInputOutput {
            }
        }

        protected int getValueFromFreqString(final String freqString) {
            return Integer.parseInt(freqString);
        }

        // This may return an empty map, but will never return null.
        public HashMap<String, ArrayList<WeightedString>> getAssocMap() {
            return mAssocMap;
@@ -216,22 +220,40 @@ public class XmlDictInputOutput {
    }

    /**
     * SAX handler for a shortcut XML file.
     * SAX handler for a shortcut & whitelist XML file.
     */
    static private class ShortcutHandler extends AssociativeListHandler {
    static private class ShortcutAndWhitelistHandler extends AssociativeListHandler {
        private final static String ENTRY_TAG = "entry";
        private final static String ENTRY_ATTRIBUTE = "shortcut";
        private final static String TARGET_TAG = "target";
        private final static String REPLACEMENT_ATTRIBUTE = "replacement";
        private final static String TARGET_PRIORITY_ATTRIBUTE = "priority";
        private final static String WHITELIST_MARKER = "whitelist";
        private final static int WHITELIST_FREQ_VALUE = 15;
        private final static int MIN_FREQ = 0;
        private final static int MAX_FREQ = 14;

        public ShortcutHandler() {
        public ShortcutAndWhitelistHandler() {
            super(ENTRY_TAG, ENTRY_ATTRIBUTE, TARGET_TAG, REPLACEMENT_ATTRIBUTE,
                    TARGET_PRIORITY_ATTRIBUTE);
        }

        @Override
        protected int getValueFromFreqString(final String freqString) {
            if (WHITELIST_MARKER.equals(freqString)) {
                return WHITELIST_FREQ_VALUE;
            } else {
                final int intValue = super.getValueFromFreqString(freqString);
                if (intValue < MIN_FREQ || intValue > MAX_FREQ) {
                    throw new RuntimeException("Shortcut freq out of range. Accepted range is "
                            + MIN_FREQ + ".." + MAX_FREQ);
                }
                return intValue;
            }
        }

        // As per getAssocMap(), this never returns null.
        public HashMap<String, ArrayList<WeightedString>> getShortcutMap() {
        public HashMap<String, ArrayList<WeightedString>> getShortcutAndWhitelistMap() {
            return getAssocMap();
        }
    }
@@ -243,7 +265,7 @@ public class XmlDictInputOutput {
     * representation.
     *
     * @param unigrams the file to read the data from.
     * @param shortcuts the file to read the shortcuts from, or null.
     * @param shortcuts the file to read the shortcuts & whitelist from, or null.
     * @param bigrams the file to read the bigrams from, or null.
     * @return the in-memory representation of the dictionary.
     */
@@ -256,11 +278,12 @@ public class XmlDictInputOutput {
        final BigramHandler bigramHandler = new BigramHandler();
        if (null != bigrams) parser.parse(bigrams, bigramHandler);

        final ShortcutHandler shortcutHandler = new ShortcutHandler();
        if (null != shortcuts) parser.parse(shortcuts, shortcutHandler);
        final ShortcutAndWhitelistHandler shortcutAndWhitelistHandler =
                new ShortcutAndWhitelistHandler();
        if (null != shortcuts) parser.parse(shortcuts, shortcutAndWhitelistHandler);

        final UnigramHandler unigramHandler =
                new UnigramHandler(shortcutHandler.getShortcutMap());
                new UnigramHandler(shortcutAndWhitelistHandler.getShortcutAndWhitelistMap());
        parser.parse(unigrams, unigramHandler);
        final FusionDictionary dict = unigramHandler.getFinalDictionary();
        final HashMap<String, ArrayList<WeightedString>> bigramMap = bigramHandler.getBigramMap();
@@ -280,7 +303,7 @@ public class XmlDictInputOutput {
     *
     * This method reads data from the parser and creates a new FusionDictionary with it.
     * The format parsed by this method is the format used before Ice Cream Sandwich,
     * which has no support for bigrams or shortcuts.
     * which has no support for bigrams or shortcuts/whitelist.
     * It is important to note that this method expects the parser to have already eaten
     * the first, all-encompassing tag.
     *
@@ -291,7 +314,7 @@ public class XmlDictInputOutput {
    /**
     * Writes a dictionary to an XML file.
     *
     * The output format is the "second" format, which supports bigrams and shortcuts.
     * The output format is the "second" format, which supports bigrams and shortcuts/whitelist.
     *
     * @param destination a destination stream to write to.
     * @param dict the dictionary to write.