Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fd018c15 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Use CombinedFormatUtils to convert dict elements to strings."

parents 337dce80 b24de426
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@
package com.android.inputmethod.latin.makedict;

import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CombinedFormatUtils;

import java.util.Arrays;

@@ -57,8 +57,7 @@ public final class ProbabilityInfo {

    @Override
    public String toString() {
        return "f=" + mProbability + (hasHistoricalInfo() ?
                ",historicalInfo=" + mTimestamp + ":" + mLevel + ":" + mCount : "");
        return CombinedFormatUtils.formatProbabilityInfo(this);
    }

    @Override
+4 −29
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import com.android.inputmethod.latin.utils.StringUtils;

import java.util.ArrayList;
@@ -52,8 +53,8 @@ public final class WordProperty implements Comparable<WordProperty> {
        mBigrams = bigrams;
        mIsNotAWord = isNotAWord;
        mIsBlacklistEntry = isBlacklistEntry;
        mHasBigrams = !bigrams.isEmpty();
        mHasShortcuts = !shortcutTargets.isEmpty();
        mHasBigrams = bigrams != null && !bigrams.isEmpty();
        mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
    }

    private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
@@ -158,32 +159,6 @@ public final class WordProperty implements Comparable<WordProperty> {

    @Override
    public String toString() {
        // TODO: Move this logic to CombinedInputOutput.
        final StringBuffer builder = new StringBuffer();
        builder.append(" word=" + mWord);
        builder.append(",");
        builder.append(mProbabilityInfo.toString());
        if (mIsNotAWord) {
            builder.append(",");
            builder.append("not_a_word=true");
        }
        if (mIsBlacklistEntry) {
            builder.append(",");
            builder.append("blacklisted=true");
        }
        builder.append("\n");
        for (int i = 0; i < mBigrams.size(); i++) {
            builder.append("  bigram=" + mBigrams.get(i).mWord);
            builder.append(",");
            builder.append(mBigrams.get(i).mProbabilityInfo.toString());
            builder.append("\n");
        }
        for (int i = 0; i < mShortcutTargets.size(); i++) {
            builder.append("  shortcut=" + mShortcutTargets.get(i).mWord);
            builder.append(",");
            builder.append(mShortcutTargets.get(i).mProbabilityInfo.toString());
            builder.append("\n");
        }
        return builder.toString();
        return CombinedFormatUtils.formatWordProperty(this);
    }
}
+99 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.android.inputmethod.latin.utils;

import com.android.inputmethod.latin.makedict.DictionaryHeader;
import com.android.inputmethod.latin.makedict.ProbabilityInfo;
import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;

import java.util.HashMap;

public class CombinedFormatUtils {
    public static final String DICTIONARY_TAG = "dictionary";
    public static final String BIGRAM_TAG = "bigram";
    public static final String SHORTCUT_TAG = "shortcut";
    public static final String PROBABILITY_TAG = "f";
    public static final String HISTORICAL_INFO_TAG = "historicalInfo";
    public static final String HISTORICAL_INFO_SEPARATOR = ":";
    public static final String WORD_TAG = "word";
    public static final String NOT_A_WORD_TAG = "not_a_word";
    public static final String BLACKLISTED_TAG = "blacklisted";

    public static String formatAttributeMap(final HashMap<String, String> attributeMap) {
        final StringBuilder builder = new StringBuilder();
        builder.append(DICTIONARY_TAG + "=");
        if (attributeMap.containsKey(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY)) {
            builder.append(attributeMap.get(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY));
        }
        for (final String key : attributeMap.keySet()) {
            if (key == DictionaryHeader.DICTIONARY_DESCRIPTION_KEY) {
                continue;
            }
            final String value = attributeMap.get(key);
            builder.append("," + key + "=" + value);
        }
        builder.append("\n");
        return builder.toString();
    }

    public static String formatWordProperty(final WordProperty wordProperty) {
        final StringBuilder builder = new StringBuilder();
        builder.append(" " + WORD_TAG + "=" + wordProperty.mWord);
        builder.append(",");
        builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo));
        if (wordProperty.mIsNotAWord) {
            builder.append("," + NOT_A_WORD_TAG + "=true");
        }
        if (wordProperty.mIsBlacklistEntry) {
            builder.append("," + BLACKLISTED_TAG + "=true");
        }
        builder.append("\n");
        if (wordProperty.mShortcutTargets != null) {
            for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
                builder.append("  " + SHORTCUT_TAG + "=" + shortcutTarget.mWord);
                builder.append(",");
                builder.append(formatProbabilityInfo(shortcutTarget.mProbabilityInfo));
                builder.append("\n");
            }
        }
        if (wordProperty.mBigrams != null) {
            for (final WeightedString bigram : wordProperty.mBigrams) {
                builder.append("  " + BIGRAM_TAG + "=" + bigram.mWord);
                builder.append(",");
                builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));
                builder.append("\n");
            }
        }
        return builder.toString();
    }

    public static String formatProbabilityInfo(final ProbabilityInfo probabilityInfo) {
        final StringBuilder builder = new StringBuilder();
        builder.append(PROBABILITY_TAG + "=" + probabilityInfo.mProbability);
        if (probabilityInfo.hasHistoricalInfo()) {
            builder.append(",");
            builder.append(HISTORICAL_INFO_TAG + "=");
            builder.append(probabilityInfo.mTimestamp);
            builder.append(HISTORICAL_INFO_SEPARATOR);
            builder.append(probabilityInfo.mLevel);
            builder.append(HISTORICAL_INFO_SEPARATOR);
            builder.append(probabilityInfo.mCount);
        }
        return builder.toString();
    }
}
+1 −0
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@ USED_TARGETTED_UTILS := \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/settings/NativeSuggestOptions.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CombinedFormatUtils.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CoordinateUtils.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/FileUtils.java \
        $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \
+34 −48
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.utils.CombinedFormatUtils;

import java.io.BufferedReader;
import java.io.File;
@@ -41,16 +42,10 @@ import java.util.TreeSet;
 * All functions in this class are static.
 */
public class CombinedInputOutput {

    private static final String DICTIONARY_TAG = "dictionary";
    private static final String BIGRAM_TAG = "bigram";
    private static final String SHORTCUT_TAG = "shortcut";
    private static final String PROBABILITY_TAG = "f";
    private static final String WORD_TAG = "word";
    private static final String NOT_A_WORD_TAG = "not_a_word";
    private static final String WHITELIST_TAG = "whitelist";
    private static final String OPTIONS_TAG = "options";
    private static final String COMMENT_LINE_STARTER = "#";
    private static final int HISTORICAL_INFO_ELEMENT_COUNT = 3;

    /**
     * Basic test to find out whether the file is in the combined format or not.
@@ -68,7 +63,8 @@ public class CombinedInputOutput {
            while (firstLine.startsWith(COMMENT_LINE_STARTER)) {
                firstLine = reader.readLine();
            }
            return firstLine.matches("^" + DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
            return firstLine.matches(
                    "^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
        } catch (FileNotFoundException e) {
            return false;
        } catch (IOException e) {
@@ -123,7 +119,7 @@ public class CombinedInputOutput {
        while (null != (line = reader.readLine())) {
            if (line.startsWith(COMMENT_LINE_STARTER)) continue;
            final String args[] = line.trim().split(",");
            if (args[0].matches(WORD_TAG + "=.*")) {
            if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
                if (null != word) {
                    dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
                    for (WeightedString s : bigrams) {
@@ -136,23 +132,30 @@ public class CombinedInputOutput {
                for (String param : args) {
                    final String params[] = param.split("=", 2);
                    if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
                    if (WORD_TAG.equals(params[0])) {
                    if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
                        word = params[1];
                    } else if (PROBABILITY_TAG.equals(params[0])) {
                    } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
                        freq = Integer.parseInt(params[1]);
                    } else if (NOT_A_WORD_TAG.equals(params[0])) {
                    } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
                        final String[] historicalInfoParams =
                                params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
                        if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
                            throw new RuntimeException("Wrong format (historical info) : " + line);
                        }
                        // TODO: Use parsed historical info.
                    } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
                        isNotAWord = "true".equals(params[1]);
                    }
                }
            } else if (args[0].matches(SHORTCUT_TAG + "=.*")) {
            } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
                String shortcut = null;
                int shortcutFreq = 0;
                for (String param : args) {
                    final String params[] = param.split("=", 2);
                    if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
                    if (SHORTCUT_TAG.equals(params[0])) {
                    if (CombinedFormatUtils.SHORTCUT_TAG.equals(params[0])) {
                        shortcut = params[1];
                    } else if (PROBABILITY_TAG.equals(params[0])) {
                    } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
                        shortcutFreq = WHITELIST_TAG.equals(params[1])
                                ? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
                                : Integer.parseInt(params[1]);
@@ -163,16 +166,23 @@ public class CombinedInputOutput {
                } else {
                    throw new RuntimeException("Wrong format : " + line);
                }
            } else if (args[0].matches(BIGRAM_TAG + "=.*")) {
            } else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) {
                String secondWordOfBigram = null;
                int bigramFreq = 0;
                for (String param : args) {
                    final String params[] = param.split("=", 2);
                    if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
                    if (BIGRAM_TAG.equals(params[0])) {
                    if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) {
                        secondWordOfBigram = params[1];
                    } else if (PROBABILITY_TAG.equals(params[0])) {
                    } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
                        bigramFreq = Integer.parseInt(params[1]);
                    }  else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
                        final String[] historicalInfoParams =
                                params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
                        if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
                            throw new RuntimeException("Wrong format (historical info) : " + line);
                        }
                        // TODO: Use parsed historical info.
                    }
                }
                if (null != secondWordOfBigram) {
@@ -198,40 +208,16 @@ public class CombinedInputOutput {
     * @param destination a destination stream to write to.
     * @param dict the dictionary to write.
     */
    public static void writeDictionaryCombined(Writer destination, FusionDictionary dict)
            throws IOException {
    public static void writeDictionaryCombined(
            final Writer destination, final FusionDictionary dict) throws IOException {
        final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
        for (WordProperty wordProperty: dict) {
        for (final WordProperty wordProperty : dict) {
            // This for ordering by frequency, then by asciibetic order
            wordPropertiesInDict.add(wordProperty);
        }
        final HashMap<String, String> options = dict.mOptions.mAttributes;
        destination.write(DICTIONARY_TAG + "=");
        if (options.containsKey(DICTIONARY_TAG)) {
            destination.write(options.get(DICTIONARY_TAG));
            options.remove(DICTIONARY_TAG);
        }
        for (final String key : dict.mOptions.mAttributes.keySet()) {
            final String value = dict.mOptions.mAttributes.get(key);
            destination.write("," + key + "=" + value);
        }
        destination.write("\n");
        for (WordProperty wordProperty : wordPropertiesInDict) {
            destination.write(" " + WORD_TAG + "=" + wordProperty.mWord + ","
                    + PROBABILITY_TAG + "=" + wordProperty.getProbability()
                    + (wordProperty.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
            if (null != wordProperty.mShortcutTargets) {
                for (WeightedString target : wordProperty.mShortcutTargets) {
                    destination.write("  " + SHORTCUT_TAG + "=" + target.mWord + ","
                            + PROBABILITY_TAG + "=" + target.getProbability() + "\n");
                }
            }
            if (null != wordProperty.mBigrams) {
                for (WeightedString bigram : wordProperty.mBigrams) {
                    destination.write("  " + BIGRAM_TAG + "=" + bigram.mWord + ","
                            + PROBABILITY_TAG + "=" + bigram.getProbability() + "\n");
                }
            }
        destination.write(CombinedFormatUtils.formatAttributeMap(dict.mOptions.mAttributes));
        for (final WordProperty wordProperty : wordPropertiesInDict) {
            destination.write(CombinedFormatUtils.formatWordProperty(wordProperty));
        }
        destination.close();
    }