Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 342d5d5d authored by Jean Chalard's avatar Jean Chalard
Browse files

Wire the Xml-read shortcuts into the dict creation code (B6)

Change-Id: I352064835abb62c294b48e080d9709ff013c7bb0
parent 8edd3067
Loading
Loading
Loading
Loading
+16 −6
Original line number Diff line number Diff line
@@ -39,11 +39,13 @@ public class DictionaryMaker {
        private final static String OPTION_VERSION_2 = "-2";
        private final static String OPTION_INPUT_SOURCE = "-s";
        private final static String OPTION_INPUT_BIGRAM_XML = "-b";
        private final static String OPTION_INPUT_SHORTCUT_XML = "-c";
        private final static String OPTION_OUTPUT_BINARY = "-d";
        private final static String OPTION_OUTPUT_XML = "-x";
        private final static String OPTION_HELP = "-h";
        public final String mInputBinary;
        public final String mInputUnigramXml;
        public final String mInputShortcutXml;
        public final String mInputBigramXml;
        public final String mOutputBinary;
        public final String mOutputXml;
@@ -72,7 +74,8 @@ public class DictionaryMaker {

        private void displayHelp() {
            MakedictLog.i("Usage: makedict "
                    + "[-s <unigrams.xml> [-b <bigrams.xml>] | -s <binary input>] "
                    + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] "
                    + "| -s <binary input>] "
                    + "[-d <binary output>] [-x <xml output>] [-2]\n"
                    + "\n"
                    + "  Converts a source dictionary file to one or several outputs.\n"
@@ -90,6 +93,7 @@ public class DictionaryMaker {
            }
            String inputBinary = null;
            String inputUnigramXml = null;
            String inputShortcutXml = null;
            String inputBigramXml = null;
            String outputBinary = null;
            String outputXml = null;
@@ -116,6 +120,8 @@ public class DictionaryMaker {
                            } else {
                                inputUnigramXml = filename;
                            }
                        } else if (OPTION_INPUT_SHORTCUT_XML.equals(arg)) {
                            inputShortcutXml = filename;
                        } else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) {
                            inputBigramXml = filename;
                        } else if (OPTION_OUTPUT_BINARY.equals(arg)) {
@@ -143,6 +149,7 @@ public class DictionaryMaker {

            mInputBinary = inputBinary;
            mInputUnigramXml = inputUnigramXml;
            mInputShortcutXml = inputShortcutXml;
            mInputBigramXml = inputBigramXml;
            mOutputBinary = outputBinary;
            mOutputXml = outputXml;
@@ -170,7 +177,7 @@ public class DictionaryMaker {
        if (null != args.mInputBinary) {
            return readBinaryFile(args.mInputBinary);
        } else if (null != args.mInputUnigramXml) {
            return readXmlFile(args.mInputUnigramXml, args.mInputBigramXml);
            return readXmlFile(args.mInputUnigramXml, args.mInputShortcutXml, args.mInputBigramXml);
        } else {
            throw new RuntimeException("No input file specified");
        }
@@ -195,6 +202,7 @@ public class DictionaryMaker {
     * Read a dictionary from a unigram XML file, and optionally a bigram XML file.
     *
     * @param unigramXmlFilename the name of the unigram XML file. May not be null.
     * @param shortcutXmlFilename the name of the shortcut XML file, or null if there is none.
     * @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams.
     * @return the read dictionary.
     * @throws FileNotFoundException if one of the files can't be found
@@ -203,12 +211,14 @@ public class DictionaryMaker {
     * @throws ParserConfigurationException if the system can't create a SAX parser
     */
    private static FusionDictionary readXmlFile(final String unigramXmlFilename,
            final String bigramXmlFilename) throws FileNotFoundException, SAXException,
            IOException, ParserConfigurationException {
            final String shortcutXmlFilename, final String bigramXmlFilename)
            throws FileNotFoundException, SAXException, IOException, ParserConfigurationException {
        final FileInputStream unigrams = new FileInputStream(new File(unigramXmlFilename));
        final FileInputStream shortcuts = null == shortcutXmlFilename ? null :
                new FileInputStream(new File(shortcutXmlFilename));
        final FileInputStream bigrams = null == bigramXmlFilename ? null :
                new FileInputStream(new File(bigramXmlFilename));
        return XmlDictInputOutput.readDictionaryXml(unigrams, bigrams);
        return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams);
    }

    /**
+15 −6
Original line number Diff line number Diff line
@@ -61,6 +61,7 @@ public class XmlDictInputOutput {
        int mState; // the state of the parser
        int mFreq; // the currently read freq
        String mWord; // the current word
        final HashMap<String, ArrayList<WeightedString>> mShortcutsMap;
        final HashMap<String, ArrayList<WeightedString>> mBigramsMap;

        /**
@@ -69,9 +70,11 @@ public class XmlDictInputOutput {
         * @param dict the dictionary to construct.
         * @param bigrams the bigrams as a map. This may be empty, but may not be null.
         */
        public UnigramHandler(FusionDictionary dict,
                HashMap<String, ArrayList<WeightedString>> bigrams) {
        public UnigramHandler(final FusionDictionary dict,
                final HashMap<String, ArrayList<WeightedString>> shortcuts,
                final HashMap<String, ArrayList<WeightedString>> bigrams) {
            mDictionary = dict;
            mShortcutsMap = shortcuts;
            mBigramsMap = bigrams;
            mWord = "";
            mState = START;
@@ -107,8 +110,7 @@ public class XmlDictInputOutput {
        @Override
        public void endElement(String uri, String localName, String qName) {
            if (WORD == mState) {
                // TODO: pass the shortcut targets
                mDictionary.add(mWord, mFreq, null, mBigramsMap.get(mWord));
                mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), mBigramsMap.get(mWord));
                mState = START;
            }
        }
@@ -208,9 +210,12 @@ public class XmlDictInputOutput {
     * representation.
     *
     * @param unigrams the file to read the data from.
     * @param shortcuts the file to read the shortcuts from, or null.
     * @param bigrams the file to read the bigrams from, or null.
     * @return the in-memory representation of the dictionary.
     */
    public static FusionDictionary readDictionaryXml(InputStream unigrams, InputStream bigrams)
    public static FusionDictionary readDictionaryXml(final InputStream unigrams,
            final InputStream shortcuts, final InputStream bigrams)
            throws SAXException, IOException, ParserConfigurationException {
        final SAXParserFactory factory = SAXParserFactory.newInstance();
        factory.setNamespaceAware(true);
@@ -218,9 +223,13 @@ public class XmlDictInputOutput {
        final BigramHandler bigramHandler = new BigramHandler();
        if (null != bigrams) parser.parse(bigrams, bigramHandler);

        final ShortcutHandler shortcutHandler = new ShortcutHandler();
        if (null != shortcuts) parser.parse(shortcuts, shortcutHandler);

        final FusionDictionary dict = new FusionDictionary();
        final UnigramHandler unigramHandler =
                new UnigramHandler(dict, bigramHandler.getBigramMap());
                new UnigramHandler(dict, shortcutHandler.getShortcutMap(),
                        bigramHandler.getBigramMap());
        parser.parse(unigrams, unigramHandler);
        return dict;
    }