Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1085fef8 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Change entry count limit.

Unigram 10K, Bigram 30K, Trigram 30K.

Change-Id: Ibd19c6a2b618499df1c70000bad7b47498187f0a
parent 101cdca7
Loading
Loading
Loading
Loading
+0 −7
Original line number Diff line number Diff line
@@ -64,9 +64,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {

    private static final int TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS = 100;

    private static final int DEFAULT_MAX_UNIGRAM_COUNT = 10000;
    private static final int DEFAULT_MAX_BIGRAM_COUNT = 10000;

    /**
     * The maximum length of a word in this dictionary.
     */
@@ -225,10 +222,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
        attributeMap.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, mLocale.toString());
        attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY,
                String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
        attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY,
                String.valueOf(DEFAULT_MAX_UNIGRAM_COUNT));
        attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY,
                String.valueOf(DEFAULT_MAX_BIGRAM_COUNT));
        return attributeMap;
    }

+3 −2
Original line number Diff line number Diff line
@@ -40,8 +40,9 @@ public final class DictionaryHeader {
    public static final String USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
    public static final String FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
            "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";
    public static final String MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT";
    public static final String MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT";
    public static final String MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_ENTRY_COUNT";
    public static final String MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_ENTRY_COUNT";
    public static final String MAX_TRIGRAM_COUNT_KEY = "MAX_TRIGRAM_ENTRY_COUNT";
    public static final String ATTRIBUTE_VALUE_TRUE = "1";
    public static final String CODE_POINT_TABLE_KEY = "codePointTable";

+5 −3
Original line number Diff line number Diff line
@@ -38,15 +38,17 @@ const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
        "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";

const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT";
const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT";
const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_ENTRY_COUNT";
const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_ENTRY_COUNT";
const char *const HeaderPolicy::MAX_TRIGRAM_COUNT_KEY = "MAX_TRIGRAM_ENTRY_COUNT";

const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3;

const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000;
const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000;
const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 30000;
const int HeaderPolicy::DEFAULT_MAX_TRIGRAM_COUNT = 30000;

// Used for logging. Question mark is used to indicate that the key is not found.
void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,
+2 −0
Original line number Diff line number Diff line
@@ -253,11 +253,13 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
    static const char *const FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY;
    static const char *const MAX_UNIGRAM_COUNT_KEY;
    static const char *const MAX_BIGRAM_COUNT_KEY;
    static const char *const MAX_TRIGRAM_COUNT_KEY;
    static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
    static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
    static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
    static const int DEFAULT_MAX_UNIGRAM_COUNT;
    static const int DEFAULT_MAX_BIGRAM_COUNT;
    static const int DEFAULT_MAX_TRIGRAM_COUNT;

    const FormatUtils::FORMAT_VERSION mDictFormatVersion;
    const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
+28 −10
Original line number Diff line number Diff line
@@ -39,7 +39,6 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.TimeUnit;

@@ -136,11 +135,18 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
    private HashSet<File> mDictFilesToBeDeleted = new HashSet<>();

    private File createEmptyDictionaryAndGetFile(final int formatVersion) {
        return createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion,
                new HashMap<String, String>());
    }

    private File createEmptyDictionaryWithAttributeMapAndGetFile(final int formatVersion,
            final HashMap<String, String> attributeMap) {
        if (formatVersion == FormatSpec.VERSION4
                || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
                || formatVersion == FormatSpec.VERSION4_DEV) {
            try {
                final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion);
                final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion,
                        attributeMap);
                mDictFilesToBeDeleted.add(dictFile);
                return dictFile;
            } catch (final IOException e) {
@@ -152,12 +158,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
        return null;
    }

    private File createEmptyVer4DictionaryAndGetFile(final int formatVersion)
    private File createEmptyVer4DictionaryAndGetFile(final int formatVersion,
            final HashMap<String, String> attributeMap)
            throws IOException {
        final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION,
                getContext().getCacheDir());
        FileUtils.deleteRecursively(file);
        Map<String, String> attributeMap = new HashMap<>();
        attributeMap.put(DictionaryHeader.DICTIONARY_ID_KEY, DICTIONARY_ID);
        attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY,
                String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
@@ -388,7 +394,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
        }

        final int maxUnigramCount = Integer.parseInt(
                binaryDictionary.getPropertyForGettingStats(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
                binaryDictionary.getPropertyForGettingStats(
                        BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
        for (int i = 0; i < unigramTypedCount; i++) {
            final String word = words.get(random.nextInt(words.size()));
            onInputWord(binaryDictionary, word, true /* isValidWord */);
@@ -476,6 +483,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
    }

    private void testAddManyBigramsToDecayingDict(final int formatVersion) {
        final int maxUnigramCount = 5000;
        final int maxBigramCount = 10000;
        final HashMap<String, String> attributeMap = new HashMap<>();
        attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
        attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));

        final int unigramCount = 5000;
        final int bigramCount = 30000;
        final int bigramTypedCount = 100000;
@@ -484,7 +497,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
        final Random random = new Random(seed);

        setCurrentTimeForTestMode(mCurrentTime);
        final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
        final File dictFile = createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion,
                attributeMap);
        final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);

        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
@@ -507,9 +521,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
            bigrams.add(bigram);
        }

        final int maxBigramCount = Integer.parseInt(
                binaryDictionary.getPropertyForGettingStats(
                        BinaryDictionary.MAX_BIGRAM_COUNT_QUERY));
        for (int i = 0; i < bigramTypedCount; ++i) {
            final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size()));
            onInputWord(binaryDictionary, bigram.first, true /* isValidWord */);
@@ -546,6 +557,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
    }

    private void testOverflowBigrams(final int formatVersion) {
        final int maxUnigramCount = 5000;
        final int maxBigramCount = 10000;
        final HashMap<String, String> attributeMap = new HashMap<>();
        attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
        attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));

        final int bigramCount = 20000;
        final int unigramCount = 1000;
        final int unigramTypedCount = 20;
@@ -556,7 +573,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
        final long seed = System.currentTimeMillis();
        final Random random = new Random(seed);
        setCurrentTimeForTestMode(mCurrentTime);
        final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
        final File dictFile = createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion,
                attributeMap);
        final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);

Loading