Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit afe67611 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Add a class to have global counters for LanguageModelDictContent."

parents ca6e5dfe 6b0561f9
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -72,6 +72,7 @@ LATIN_IME_CORE_SRC_FILES := \
        ver4_pt_node_array_reader.cpp) \
    $(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
        language_model_dict_content.cpp \
        language_model_dict_content_global_counters.cpp \
        shortcut_dict_content.cpp \
        sparse_table_dict_content.cpp \
        terminal_position_lookup_table.cpp) \
@@ -128,6 +129,7 @@ LATIN_IME_CORE_TEST_FILES := \
    suggest/core/layout/normal_distribution_2d_test.cpp \
    suggest/policyimpl/dictionary/header/header_read_write_utils_test.cpp \
    suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp \
    suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp \
    suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp \
    suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table_test.cpp \
    suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \
+8 −1
Original line number Diff line number Diff line
@@ -24,9 +24,11 @@
namespace latinime {

const int LanguageModelDictContent::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
const int LanguageModelDictContent::TRIE_MAP_BUFFER_INDEX = 0;
const int LanguageModelDictContent::GLOBAL_COUNTERS_BUFFER_INDEX = 1;

bool LanguageModelDictContent::save(FILE *const file) const {
    return mTrieMap.save(file);
    return mTrieMap.save(file) && mGlobalCounters.save(file);
}

bool LanguageModelDictContent::runGC(
@@ -212,6 +214,9 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView
    if (!setProbabilityEntry(wordId, &updatedUnigramProbabilityEntry)) {
        return false;
    }
    mGlobalCounters.incrementTotalCount();
    mGlobalCounters.updateMaxValueOfCounters(
            updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount());
    for (size_t i = 0; i < prevWordIds.size(); ++i) {
        if (prevWordIds[i] == NOT_A_WORD_ID) {
            break;
@@ -225,6 +230,8 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView
        if (!setNgramProbabilityEntry(limitedPrevWordIds, wordId, &updatedNgramProbabilityEntry)) {
            return false;
        }
        mGlobalCounters.updateMaxValueOfCounters(
                updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount());
        if (!originalNgramProbabilityEntry.isValid()) {
            entryCountersToUpdate->incrementNgramCount(i + 2);
        }
+10 −4
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@

#include "defines.h"
#include "suggest/core/dictionary/word_attributes.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -131,15 +132,17 @@ class LanguageModelDictContent {
        const ProbabilityEntry mProbabilityEntry;
    };

    LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer,
    LanguageModelDictContent(const ReadWriteByteArrayView *const buffers,
            const bool hasHistoricalInfo)
            : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {}
            : mTrieMap(buffers[TRIE_MAP_BUFFER_INDEX]),
              mGlobalCounters(buffers[GLOBAL_COUNTERS_BUFFER_INDEX]),
              mHasHistoricalInfo(hasHistoricalInfo) {}

    explicit LanguageModelDictContent(const bool hasHistoricalInfo)
            : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {}
            : mTrieMap(), mGlobalCounters(), mHasHistoricalInfo(hasHistoricalInfo) {}

    bool isNearSizeLimit() const {
        return mTrieMap.isNearSizeLimit();
        return mTrieMap.isNearSizeLimit() || mGlobalCounters.needsToHalveCounters();
    }

    bool save(FILE *const file) const;
@@ -218,8 +221,11 @@ class LanguageModelDictContent {

    // TODO: Remove
    static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
    static const int TRIE_MAP_BUFFER_INDEX;
    static const int GLOBAL_COUNTERS_BUFFER_INDEX;

    TrieMap mTrieMap;
    LanguageModelDictContentGlobalCounters mGlobalCounters;
    const bool mHasHistoricalInfo;

    bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+30 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2014, The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h"

#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"

namespace latinime {

const int LanguageModelDictContentGlobalCounters::COUNTER_VALUE_NEAR_LIMIT_THRESHOLD =
        (1 << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) - 64;
const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD = 1 << 30;
const int LanguageModelDictContentGlobalCounters::COUNTER_SIZE_IN_BYTES = 4;
const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_INDEX = 0;
const int LanguageModelDictContentGlobalCounters::MAX_VALUE_OF_COUNTERS_INDEX = 1;

} // namespace latinime
+97 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2014, The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H
#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H

#include <cstdio>

#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "utils/byte_array_view.h"

namespace latinime {

class LanguageModelDictContentGlobalCounters {
 public:
    explicit LanguageModelDictContentGlobalCounters(const ReadWriteByteArrayView buffer)
            : mBuffer(buffer, 0 /* maxAdditionalBufferSize */),
              mTotalCount(readValue(mBuffer, TOTAL_COUNT_INDEX)),
              mMaxValueOfCounters(readValue(mBuffer, MAX_VALUE_OF_COUNTERS_INDEX)) {}

    LanguageModelDictContentGlobalCounters()
            : mBuffer(0 /* maxAdditionalBufferSize */), mTotalCount(0), mMaxValueOfCounters(0) {}

    bool needsToHalveCounters() const {
        return mMaxValueOfCounters >= COUNTER_VALUE_NEAR_LIMIT_THRESHOLD
                || mTotalCount >= TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD;
    }

    int getTotalCount() const {
        return mTotalCount;
    }

    bool save(FILE *const file) const {
        BufferWithExtendableBuffer bufferToWrite(
                BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
        if (!bufferToWrite.writeUint(mTotalCount, COUNTER_SIZE_IN_BYTES,
                TOTAL_COUNT_INDEX * COUNTER_SIZE_IN_BYTES)) {
            return false;
        }
        if (!bufferToWrite.writeUint(mMaxValueOfCounters, COUNTER_SIZE_IN_BYTES,
                MAX_VALUE_OF_COUNTERS_INDEX * COUNTER_SIZE_IN_BYTES)) {
            return false;
        }
        return DictFileWritingUtils::writeBufferToFileTail(file, &bufferToWrite);
    }

    void incrementTotalCount() {
        mTotalCount += 1;
    }

    void updateMaxValueOfCounters(const int count) {
        mMaxValueOfCounters = std::max(count, mMaxValueOfCounters);
    }

    void halveCounters() {
        mMaxValueOfCounters /= 2;
        mTotalCount /= 2;
    }

private:
    DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContentGlobalCounters);

    const static int COUNTER_VALUE_NEAR_LIMIT_THRESHOLD;
    const static int TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD;
    const static int COUNTER_SIZE_IN_BYTES;
    const static int TOTAL_COUNT_INDEX;
    const static int MAX_VALUE_OF_COUNTERS_INDEX;

    BufferWithExtendableBuffer mBuffer;
    int mTotalCount;
    int mMaxValueOfCounters;

    static int readValue(const BufferWithExtendableBuffer &buffer, const int index) {
        const int pos = COUNTER_SIZE_IN_BYTES * index;
        if (pos + COUNTER_SIZE_IN_BYTES > buffer.getTailPosition()) {
            return 0;
        }
        return buffer.readUint(COUNTER_SIZE_IN_BYTES, pos);
    }
};
} // namespace latinime
#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H */
Loading