Loading native/jni/NativeFileList.mk +2 −0 Original line number Diff line number Diff line Loading @@ -72,6 +72,7 @@ LATIN_IME_CORE_SRC_FILES := \ ver4_pt_node_array_reader.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \ language_model_dict_content.cpp \ language_model_dict_content_global_counters.cpp \ shortcut_dict_content.cpp \ sparse_table_dict_content.cpp \ terminal_position_lookup_table.cpp) \ Loading Loading @@ -128,6 +129,7 @@ LATIN_IME_CORE_TEST_FILES := \ suggest/core/layout/normal_distribution_2d_test.cpp \ suggest/policyimpl/dictionary/header/header_read_write_utils_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table_test.cpp \ suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \ Loading native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +8 −1 Original line number Diff line number Diff line Loading @@ -24,9 +24,11 @@ namespace latinime { const int LanguageModelDictContent::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1; const int LanguageModelDictContent::TRIE_MAP_BUFFER_INDEX = 0; const int LanguageModelDictContent::GLOBAL_COUNTERS_BUFFER_INDEX = 1; bool LanguageModelDictContent::save(FILE *const file) const { return mTrieMap.save(file); return mTrieMap.save(file) && mGlobalCounters.save(file); } bool LanguageModelDictContent::runGC( Loading Loading @@ -212,6 +214,9 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView if (!setProbabilityEntry(wordId, &updatedUnigramProbabilityEntry)) { return false; } mGlobalCounters.incrementTotalCount(); mGlobalCounters.updateMaxValueOfCounters( updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount()); for (size_t i = 0; i < prevWordIds.size(); ++i) { if (prevWordIds[i] == NOT_A_WORD_ID) { break; Loading @@ -225,6 +230,8 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView if (!setNgramProbabilityEntry(limitedPrevWordIds, wordId, &updatedNgramProbabilityEntry)) { return false; } mGlobalCounters.updateMaxValueOfCounters( updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount()); if (!originalNgramProbabilityEntry.isValid()) { entryCountersToUpdate->incrementNgramCount(i + 2); } Loading native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h +10 −4 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/dictionary/word_attributes.h" #include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" Loading Loading @@ -131,15 +132,17 @@ class LanguageModelDictContent { const ProbabilityEntry mProbabilityEntry; }; LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer, LanguageModelDictContent(const ReadWriteByteArrayView *const buffers, const bool hasHistoricalInfo) : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {} : mTrieMap(buffers[TRIE_MAP_BUFFER_INDEX]), mGlobalCounters(buffers[GLOBAL_COUNTERS_BUFFER_INDEX]), mHasHistoricalInfo(hasHistoricalInfo) {} explicit LanguageModelDictContent(const bool hasHistoricalInfo) : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {} : mTrieMap(), mGlobalCounters(), mHasHistoricalInfo(hasHistoricalInfo) {} bool isNearSizeLimit() const { return mTrieMap.isNearSizeLimit(); return mTrieMap.isNearSizeLimit() || mGlobalCounters.needsToHalveCounters(); } bool save(FILE *const file) const; Loading Loading @@ -218,8 +221,11 @@ class LanguageModelDictContent { // TODO: Remove static const int DUMMY_PROBABILITY_FOR_VALID_WORDS; static const int TRIE_MAP_BUFFER_INDEX; static const int GLOBAL_COUNTERS_BUFFER_INDEX; TrieMap mTrieMap; LanguageModelDictContentGlobalCounters mGlobalCounters; const bool mHasHistoricalInfo; bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, Loading native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp 0 → 100644 +30 −0 Original line number Diff line number Diff line /* * Copyright (C) 2014, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" namespace latinime { const int LanguageModelDictContentGlobalCounters::COUNTER_VALUE_NEAR_LIMIT_THRESHOLD = (1 << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) - 64; const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD = 1 << 30; const int LanguageModelDictContentGlobalCounters::COUNTER_SIZE_IN_BYTES = 4; const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_INDEX = 0; const int LanguageModelDictContentGlobalCounters::MAX_VALUE_OF_COUNTERS_INDEX = 1; } // namespace latinime native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h 0 → 100644 +97 −0 Original line number Diff line number Diff line /* * Copyright (C) 2014, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H #define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H #include <cstdio> #include "defines.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/byte_array_view.h" namespace latinime { class LanguageModelDictContentGlobalCounters { public: explicit LanguageModelDictContentGlobalCounters(const ReadWriteByteArrayView buffer) : mBuffer(buffer, 0 /* maxAdditionalBufferSize */), mTotalCount(readValue(mBuffer, TOTAL_COUNT_INDEX)), mMaxValueOfCounters(readValue(mBuffer, MAX_VALUE_OF_COUNTERS_INDEX)) {} LanguageModelDictContentGlobalCounters() : mBuffer(0 /* maxAdditionalBufferSize */), mTotalCount(0), mMaxValueOfCounters(0) {} bool needsToHalveCounters() const { return mMaxValueOfCounters >= COUNTER_VALUE_NEAR_LIMIT_THRESHOLD || mTotalCount >= TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD; } int getTotalCount() const { return mTotalCount; } bool save(FILE *const file) const { BufferWithExtendableBuffer bufferToWrite( BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!bufferToWrite.writeUint(mTotalCount, COUNTER_SIZE_IN_BYTES, TOTAL_COUNT_INDEX * COUNTER_SIZE_IN_BYTES)) { return false; } if (!bufferToWrite.writeUint(mMaxValueOfCounters, COUNTER_SIZE_IN_BYTES, MAX_VALUE_OF_COUNTERS_INDEX * COUNTER_SIZE_IN_BYTES)) { return false; } return DictFileWritingUtils::writeBufferToFileTail(file, &bufferToWrite); } void incrementTotalCount() { mTotalCount += 1; } void updateMaxValueOfCounters(const int count) { mMaxValueOfCounters = std::max(count, mMaxValueOfCounters); } void halveCounters() { mMaxValueOfCounters /= 2; mTotalCount /= 2; } private: DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContentGlobalCounters); const static int COUNTER_VALUE_NEAR_LIMIT_THRESHOLD; const static int TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD; const static int COUNTER_SIZE_IN_BYTES; const static int TOTAL_COUNT_INDEX; const static int MAX_VALUE_OF_COUNTERS_INDEX; BufferWithExtendableBuffer mBuffer; int mTotalCount; int mMaxValueOfCounters; static int readValue(const BufferWithExtendableBuffer &buffer, const int index) { const int pos = COUNTER_SIZE_IN_BYTES * index; if (pos + COUNTER_SIZE_IN_BYTES > buffer.getTailPosition()) { return 0; } return buffer.readUint(COUNTER_SIZE_IN_BYTES, pos); } }; } // namespace latinime #endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H */ Loading
native/jni/NativeFileList.mk +2 −0 Original line number Diff line number Diff line Loading @@ -72,6 +72,7 @@ LATIN_IME_CORE_SRC_FILES := \ ver4_pt_node_array_reader.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \ language_model_dict_content.cpp \ language_model_dict_content_global_counters.cpp \ shortcut_dict_content.cpp \ sparse_table_dict_content.cpp \ terminal_position_lookup_table.cpp) \ Loading Loading @@ -128,6 +129,7 @@ LATIN_IME_CORE_TEST_FILES := \ suggest/core/layout/normal_distribution_2d_test.cpp \ suggest/policyimpl/dictionary/header/header_read_write_utils_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table_test.cpp \ suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \ Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +8 −1 Original line number Diff line number Diff line Loading @@ -24,9 +24,11 @@ namespace latinime { const int LanguageModelDictContent::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1; const int LanguageModelDictContent::TRIE_MAP_BUFFER_INDEX = 0; const int LanguageModelDictContent::GLOBAL_COUNTERS_BUFFER_INDEX = 1; bool LanguageModelDictContent::save(FILE *const file) const { return mTrieMap.save(file); return mTrieMap.save(file) && mGlobalCounters.save(file); } bool LanguageModelDictContent::runGC( Loading Loading @@ -212,6 +214,9 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView if (!setProbabilityEntry(wordId, &updatedUnigramProbabilityEntry)) { return false; } mGlobalCounters.incrementTotalCount(); mGlobalCounters.updateMaxValueOfCounters( updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount()); for (size_t i = 0; i < prevWordIds.size(); ++i) { if (prevWordIds[i] == NOT_A_WORD_ID) { break; Loading @@ -225,6 +230,8 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView if (!setNgramProbabilityEntry(limitedPrevWordIds, wordId, &updatedNgramProbabilityEntry)) { return false; } mGlobalCounters.updateMaxValueOfCounters( updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount()); if (!originalNgramProbabilityEntry.isValid()) { entryCountersToUpdate->incrementNgramCount(i + 2); } Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h +10 −4 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/dictionary/word_attributes.h" #include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" Loading Loading @@ -131,15 +132,17 @@ class LanguageModelDictContent { const ProbabilityEntry mProbabilityEntry; }; LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer, LanguageModelDictContent(const ReadWriteByteArrayView *const buffers, const bool hasHistoricalInfo) : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {} : mTrieMap(buffers[TRIE_MAP_BUFFER_INDEX]), mGlobalCounters(buffers[GLOBAL_COUNTERS_BUFFER_INDEX]), mHasHistoricalInfo(hasHistoricalInfo) {} explicit LanguageModelDictContent(const bool hasHistoricalInfo) : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {} : mTrieMap(), mGlobalCounters(), mHasHistoricalInfo(hasHistoricalInfo) {} bool isNearSizeLimit() const { return mTrieMap.isNearSizeLimit(); return mTrieMap.isNearSizeLimit() || mGlobalCounters.needsToHalveCounters(); } bool save(FILE *const file) const; Loading Loading @@ -218,8 +221,11 @@ class LanguageModelDictContent { // TODO: Remove static const int DUMMY_PROBABILITY_FOR_VALID_WORDS; static const int TRIE_MAP_BUFFER_INDEX; static const int GLOBAL_COUNTERS_BUFFER_INDEX; TrieMap mTrieMap; LanguageModelDictContentGlobalCounters mGlobalCounters; const bool mHasHistoricalInfo; bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp 0 → 100644 +30 −0 Original line number Diff line number Diff line /* * Copyright (C) 2014, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" namespace latinime { const int LanguageModelDictContentGlobalCounters::COUNTER_VALUE_NEAR_LIMIT_THRESHOLD = (1 << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) - 64; const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD = 1 << 30; const int LanguageModelDictContentGlobalCounters::COUNTER_SIZE_IN_BYTES = 4; const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_INDEX = 0; const int LanguageModelDictContentGlobalCounters::MAX_VALUE_OF_COUNTERS_INDEX = 1; } // namespace latinime
native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h 0 → 100644 +97 −0 Original line number Diff line number Diff line /* * Copyright (C) 2014, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H #define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H #include <cstdio> #include "defines.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/byte_array_view.h" namespace latinime { class LanguageModelDictContentGlobalCounters { public: explicit LanguageModelDictContentGlobalCounters(const ReadWriteByteArrayView buffer) : mBuffer(buffer, 0 /* maxAdditionalBufferSize */), mTotalCount(readValue(mBuffer, TOTAL_COUNT_INDEX)), mMaxValueOfCounters(readValue(mBuffer, MAX_VALUE_OF_COUNTERS_INDEX)) {} LanguageModelDictContentGlobalCounters() : mBuffer(0 /* maxAdditionalBufferSize */), mTotalCount(0), mMaxValueOfCounters(0) {} bool needsToHalveCounters() const { return mMaxValueOfCounters >= COUNTER_VALUE_NEAR_LIMIT_THRESHOLD || mTotalCount >= TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD; } int getTotalCount() const { return mTotalCount; } bool save(FILE *const file) const { BufferWithExtendableBuffer bufferToWrite( BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!bufferToWrite.writeUint(mTotalCount, COUNTER_SIZE_IN_BYTES, TOTAL_COUNT_INDEX * COUNTER_SIZE_IN_BYTES)) { return false; } if (!bufferToWrite.writeUint(mMaxValueOfCounters, COUNTER_SIZE_IN_BYTES, MAX_VALUE_OF_COUNTERS_INDEX * COUNTER_SIZE_IN_BYTES)) { return false; } return DictFileWritingUtils::writeBufferToFileTail(file, &bufferToWrite); } void incrementTotalCount() { mTotalCount += 1; } void updateMaxValueOfCounters(const int count) { mMaxValueOfCounters = std::max(count, mMaxValueOfCounters); } void halveCounters() { mMaxValueOfCounters /= 2; mTotalCount /= 2; } private: DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContentGlobalCounters); const static int COUNTER_VALUE_NEAR_LIMIT_THRESHOLD; const static int TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD; const static int COUNTER_SIZE_IN_BYTES; const static int TOTAL_COUNT_INDEX; const static int MAX_VALUE_OF_COUNTERS_INDEX; BufferWithExtendableBuffer mBuffer; int mTotalCount; int mMaxValueOfCounters; static int readValue(const BufferWithExtendableBuffer &buffer, const int index) { const int pos = COUNTER_SIZE_IN_BYTES * index; if (pos + COUNTER_SIZE_IN_BYTES > buffer.getTailPosition()) { return 0; } return buffer.readUint(COUNTER_SIZE_IN_BYTES, pos); } }; } // namespace latinime #endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H */