Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b24de426 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Use CombinedFormatUtils to convert dict elements to strings.

Bug: 11281877
Bug: 12810574
Change-Id: Ib631f75eab73abc9877a7698171c45e8f2fc7600
parent 5f5feeba
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@
package com.android.inputmethod.latin.makedict;

import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CombinedFormatUtils;

import java.util.Arrays;

@@ -57,8 +57,7 @@ public final class ProbabilityInfo {

    @Override
    public String toString() {
        return "f=" + mProbability + (hasHistoricalInfo() ?
                ",historicalInfo=" + mTimestamp + ":" + mLevel + ":" + mCount : "");
        return CombinedFormatUtils.formatProbabilityInfo(this);
    }

    @Override
+4 −29
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import com.android.inputmethod.latin.utils.StringUtils;

import java.util.ArrayList;
@@ -52,8 +53,8 @@ public final class WordProperty implements Comparable<WordProperty> {
        mBigrams = bigrams;
        mIsNotAWord = isNotAWord;
        mIsBlacklistEntry = isBlacklistEntry;
        mHasBigrams = !bigrams.isEmpty();
        mHasShortcuts = !shortcutTargets.isEmpty();
        mHasBigrams = bigrams != null && !bigrams.isEmpty();
        mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
    }

    private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
@@ -158,32 +159,6 @@ public final class WordProperty implements Comparable<WordProperty> {

    @Override
    public String toString() {
        // TODO: Move this logic to CombinedInputOutput.
        final StringBuffer builder = new StringBuffer();
        builder.append(" word=" + mWord);
        builder.append(",");
        builder.append(mProbabilityInfo.toString());
        if (mIsNotAWord) {
            builder.append(",");
            builder.append("not_a_word=true");
        }
        if (mIsBlacklistEntry) {
            builder.append(",");
            builder.append("blacklisted=true");
        }
        builder.append("\n");
        for (int i = 0; i < mBigrams.size(); i++) {
            builder.append("  bigram=" + mBigrams.get(i).mWord);
            builder.append(",");
            builder.append(mBigrams.get(i).mProbabilityInfo.toString());
            builder.append("\n");
        }
        for (int i = 0; i < mShortcutTargets.size(); i++) {
            builder.append("  shortcut=" + mShortcutTargets.get(i).mWord);
            builder.append(",");
            builder.append(mShortcutTargets.get(i).mProbabilityInfo.toString());
            builder.append("\n");
        }
        return builder.toString();
        return CombinedFormatUtils.formatWordProperty(this);
    }
}
+99 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.android.inputmethod.latin.utils;

import com.android.inputmethod.latin.makedict.DictionaryHeader;
import com.android.inputmethod.latin.makedict.ProbabilityInfo;
import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;

import java.util.HashMap;

public class CombinedFormatUtils {
    public static final String DICTIONARY_TAG = "dictionary";
    public static final String BIGRAM_TAG = "bigram";
    public static final String SHORTCUT_TAG = "shortcut";
    public static final String PROBABILITY_TAG = "f";
    public static final String HISTORICAL_INFO_TAG = "historicalInfo";
    public static final String HISTORICAL_INFO_SEPARATOR = ":";
    public static final String WORD_TAG = "word";
    public static final String NOT_A_WORD_TAG = "not_a_word";
    public static final String BLACKLISTED_TAG = "blacklisted";

    public static String formatAttributeMap(final HashMap<String, String> attributeMap) {
        final StringBuilder builder = new StringBuilder();
        builder.append(DICTIONARY_TAG + "=");
        if (attributeMap.containsKey(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY)) {
            builder.append(attributeMap.get(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY));
        }
        for (final String key : attributeMap.keySet()) {
            if (key == DictionaryHeader.DICTIONARY_DESCRIPTION_KEY) {
                continue;
            }
            final String value = attributeMap.get(key);
            builder.append("," + key + "=" + value);
        }
        builder.append("\n");
        return builder.toString();
    }

    public static String formatWordProperty(final WordProperty wordProperty) {
        final StringBuilder builder = new StringBuilder();
        builder.append(" " + WORD_TAG + "=" + wordProperty.mWord);
        builder.append(",");
        builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo));
        if (wordProperty.mIsNotAWord) {
            builder.append("," + NOT_A_WORD_TAG + "=true");
        }
        if (wordProperty.mIsBlacklistEntry) {
            builder.append("," + BLACKLISTED_TAG + "=true");
        }
        builder.append("\n");
        if (wordProperty.mShortcutTargets != null) {
            for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
                builder.append("  " + SHORTCUT_TAG + "=" + shortcutTarget.mWord);
                builder.append(",");
                builder.append(formatProbabilityInfo(shortcutTarget.mProbabilityInfo));
                builder.append("\n");
            }
        }
        if (wordProperty.mBigrams != null) {
            for (final WeightedString bigram : wordProperty.mBigrams) {
                builder.append("  " + BIGRAM_TAG + "=" + bigram.mWord);
                builder.append(",");
                builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));
                builder.append("\n");
            }
        }
        return builder.toString();
    }

    public static String formatProbabilityInfo(final ProbabilityInfo probabilityInfo) {
        final StringBuilder builder = new StringBuilder();
        builder.append(PROBABILITY_TAG + "=" + probabilityInfo.mProbability);
        if (probabilityInfo.hasHistoricalInfo()) {
            builder.append(",");
            builder.append(HISTORICAL_INFO_TAG + "=");
            builder.append(probabilityInfo.mTimestamp);
            builder.append(HISTORICAL_INFO_SEPARATOR);
            builder.append(probabilityInfo.mLevel);
            builder.append(HISTORICAL_INFO_SEPARATOR);
            builder.append(probabilityInfo.mCount);
        }
        return builder.toString();
    }
}
+1 −0
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@ USED_TARGETTED_UTILS := \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/settings/NativeSuggestOptions.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CombinedFormatUtils.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CoordinateUtils.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/FileUtils.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \
+34 −48
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.utils.CombinedFormatUtils;

import java.io.BufferedReader;
import java.io.File;
@@ -41,16 +42,10 @@ import java.util.TreeSet;
 * All functions in this class are static.
 */
public class CombinedInputOutput {

    private static final String DICTIONARY_TAG = "dictionary";
    private static final String BIGRAM_TAG = "bigram";
    private static final String SHORTCUT_TAG = "shortcut";
    private static final String PROBABILITY_TAG = "f";
    private static final String WORD_TAG = "word";
    private static final String NOT_A_WORD_TAG = "not_a_word";
    private static final String WHITELIST_TAG = "whitelist";
    private static final String OPTIONS_TAG = "options";
    private static final String COMMENT_LINE_STARTER = "#";
    private static final int HISTORICAL_INFO_ELEMENT_COUNT = 3;

    /**
     * Basic test to find out whether the file is in the combined format or not.
@@ -68,7 +63,8 @@ public class CombinedInputOutput {
            while (firstLine.startsWith(COMMENT_LINE_STARTER)) {
                firstLine = reader.readLine();
            }
            return firstLine.matches("^" + DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
            return firstLine.matches(
                    "^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
        } catch (FileNotFoundException e) {
            return false;
        } catch (IOException e) {
@@ -123,7 +119,7 @@ public class CombinedInputOutput {
        while (null != (line = reader.readLine())) {
            if (line.startsWith(COMMENT_LINE_STARTER)) continue;
            final String args[] = line.trim().split(",");
            if (args[0].matches(WORD_TAG + "=.*")) {
            if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
                if (null != word) {
                    dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
                    for (WeightedString s : bigrams) {
@@ -136,23 +132,30 @@ public class CombinedInputOutput {
                for (String param : args) {
                    final String params[] = param.split("=", 2);
                    if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
                    if (WORD_TAG.equals(params[0])) {
                    if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
                        word = params[1];
                    } else if (PROBABILITY_TAG.equals(params[0])) {
                    } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
                        freq = Integer.parseInt(params[1]);
                    } else if (NOT_A_WORD_TAG.equals(params[0])) {
                    } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
                        final String[] historicalInfoParams =
                                params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
                        if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
                            throw new RuntimeException("Wrong format (historical info) : " + line);
                        }
                        // TODO: Use parsed historical info.
                    } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
                        isNotAWord = "true".equals(params[1]);
                    }
                }
            } else if (args[0].matches(SHORTCUT_TAG + "=.*")) {
            } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
                String shortcut = null;
                int shortcutFreq = 0;
                for (String param : args) {
                    final String params[] = param.split("=", 2);
                    if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
                    if (SHORTCUT_TAG.equals(params[0])) {
                    if (CombinedFormatUtils.SHORTCUT_TAG.equals(params[0])) {
                        shortcut = params[1];
                    } else if (PROBABILITY_TAG.equals(params[0])) {
                    } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
                        shortcutFreq = WHITELIST_TAG.equals(params[1])
                                ? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
                                : Integer.parseInt(params[1]);
@@ -163,16 +166,23 @@ public class CombinedInputOutput {
                } else {
                    throw new RuntimeException("Wrong format : " + line);
                }
            } else if (args[0].matches(BIGRAM_TAG + "=.*")) {
            } else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) {
                String secondWordOfBigram = null;
                int bigramFreq = 0;
                for (String param : args) {
                    final String params[] = param.split("=", 2);
                    if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
                    if (BIGRAM_TAG.equals(params[0])) {
                    if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) {
                        secondWordOfBigram = params[1];
                    } else if (PROBABILITY_TAG.equals(params[0])) {
                    } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
                        bigramFreq = Integer.parseInt(params[1]);
                    }  else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
                        final String[] historicalInfoParams =
                                params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
                        if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
                            throw new RuntimeException("Wrong format (historical info) : " + line);
                        }
                        // TODO: Use parsed historical info.
                    }
                }
                if (null != secondWordOfBigram) {
@@ -198,40 +208,16 @@ public class CombinedInputOutput {
     * @param destination a destination stream to write to.
     * @param dict the dictionary to write.
     */
    public static void writeDictionaryCombined(Writer destination, FusionDictionary dict)
            throws IOException {
    public static void writeDictionaryCombined(
            final Writer destination, final FusionDictionary dict) throws IOException {
        final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
        for (WordProperty wordProperty: dict) {
        for (final WordProperty wordProperty : dict) {
            // This for ordering by frequency, then by asciibetic order
            wordPropertiesInDict.add(wordProperty);
        }
        final HashMap<String, String> options = dict.mOptions.mAttributes;
        destination.write(DICTIONARY_TAG + "=");
        if (options.containsKey(DICTIONARY_TAG)) {
            destination.write(options.get(DICTIONARY_TAG));
            options.remove(DICTIONARY_TAG);
        }
        for (final String key : dict.mOptions.mAttributes.keySet()) {
            final String value = dict.mOptions.mAttributes.get(key);
            destination.write("," + key + "=" + value);
        }
        destination.write("\n");
        for (WordProperty wordProperty : wordPropertiesInDict) {
            destination.write(" " + WORD_TAG + "=" + wordProperty.mWord + ","
                    + PROBABILITY_TAG + "=" + wordProperty.getProbability()
                    + (wordProperty.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
            if (null != wordProperty.mShortcutTargets) {
                for (WeightedString target : wordProperty.mShortcutTargets) {
                    destination.write("  " + SHORTCUT_TAG + "=" + target.mWord + ","
                            + PROBABILITY_TAG + "=" + target.getProbability() + "\n");
                }
            }
            if (null != wordProperty.mBigrams) {
                for (WeightedString bigram : wordProperty.mBigrams) {
                    destination.write("  " + BIGRAM_TAG + "=" + bigram.mWord + ","
                            + PROBABILITY_TAG + "=" + bigram.getProbability() + "\n");
                }
            }
        destination.write(CombinedFormatUtils.formatAttributeMap(dict.mOptions.mAttributes));
        for (final WordProperty wordProperty : wordPropertiesInDict) {
            destination.write(CombinedFormatUtils.formatWordProperty(wordProperty));
        }
        destination.close();
    }