Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7eba0198 authored by Jean Chalard's avatar Jean Chalard
Browse files

Dump binary dictionary information when opening

Bug: 9459517
Change-Id: I122583e734936ae0284e1c7500c6c9242bc7973b
parent edf58425
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -93,8 +93,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
        AKLOGE("DICT: dictionary format is unknown, bad magic number");
        releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd);
    } else {
        dictionary = new Dictionary(
                dictBuf, static_cast<int>(dictSize), fd, offset, updatableMmap);
        dictionary = new Dictionary(env, dictBuf, static_cast<int>(dictSize), fd, offset,
                updatableMmap);
    }
    PROF_END(66);
    PROF_CLOSE;
+51 −25
Original line number Diff line number Diff line
@@ -35,46 +35,74 @@
// Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java
#define MAX_PROXIMITY_CHARS_SIZE 16
#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))

#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#include <android/log.h>
#ifndef LOG_TAG
#define LOG_TAG "LatinIME: "
#endif // LOG_TAG
#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__)
#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__)

#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0)
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
#define INTS_TO_CHARS(input, length, output) do { \
        intArrayToCharArray(input, length, output); } while (0)

// TODO: Support full UTF-8 conversion
AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize,
        char *dest) {
AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize,
        char *dest, const int destSize) {
    // We want to always terminate with a 0 char, so stop one short of the length to make
    // sure there is room.
    const int destLimit = destSize - 1;
    int si = 0;
    int di = 0;
    while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) {
    while (si < sourceSize && di < destLimit && 0 != source[si]) {
        const int codePoint = source[si++];
        if (codePoint < 0x7F) {
        if (codePoint < 0x7F) { // One byte
            dest[di++] = codePoint;
        } else if (codePoint < 0x7FF) {
        } else if (codePoint < 0x7FF) { // Two bytes
            if (di + 1 >= destLimit) break;
            dest[di++] = 0xC0 + (codePoint >> 6);
            dest[di++] = 0x80 + (codePoint & 0x3F);
        } else if (codePoint < 0xFFFF) {
        } else if (codePoint < 0xFFFF) { // Three bytes
            if (di + 2 >= destLimit) break;
            dest[di++] = 0xE0 + (codePoint >> 12);
            dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6);
            dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
            dest[di++] = 0x80 + (codePoint & 0x3F);
        } else if (codePoint <= 0x1FFFFF) { // Four bytes
            if (di + 3 >= destLimit) break;
            dest[di++] = 0xF0 + (codePoint >> 18);
            dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
            dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
            dest[di++] = 0x80 + (codePoint & 0x3F);
        } else if (codePoint <= 0x3FFFFFF) { // Five bytes
            if (di + 4 >= destLimit) break;
            dest[di++] = 0xF8 + (codePoint >> 24);
            dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
            dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
            dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
            dest[di++] = codePoint & 0x3F;
        } else if (codePoint <= 0x7FFFFFFF) { // Six bytes
            if (di + 5 >= destLimit) break;
            dest[di++] = 0xFC + (codePoint >> 30);
            dest[di++] = 0x80 + ((codePoint >> 24) & 0x3F);
            dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
            dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
            dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
            dest[di++] = codePoint & 0x3F;
        } else {
            // Not a code point... skip.
        }
    }
    dest[di] = 0;
    return di;
}

#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#include <android/log.h>
#ifndef LOG_TAG
#define LOG_TAG "LatinIME: "
#endif // LOG_TAG
#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__)
#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__)

#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0)
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
#define INTS_TO_CHARS(input, length, output, outlength) do { \
        intArrayToCharArray(input, length, output, outlength); } while (0)

static inline void dumpWordInfo(const int *word, const int length, const int rank,
        const int probability) {
    static char charBuf[50];
    const int N = intArrayToCharArray(word, length, charBuf);
    const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
    if (N > 1) {
        AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability);
    }
@@ -90,7 +118,7 @@ static inline void dumpResult(const int *outWords, const int *frequencies) {

static AK_FORCE_INLINE void dumpWord(const int *word, const int length) {
    static char charBuf[50];
    const int N = intArrayToCharArray(word, length, charBuf);
    const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
    if (N > 1) {
        AKLOGI("[ %s ]", charBuf);
    }
@@ -304,8 +332,6 @@ static inline void prof_out(void) {
template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; }
template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; }

#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))

// DEBUG
#define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
+4 −3
Original line number Diff line number Diff line
@@ -28,15 +28,16 @@
#if DEBUG_DICT
#define LOGI_SHOW_ADD_COST_PROP \
        do { char charBuf[50]; \
        INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
        INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
        AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
                __FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
                getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
#define DUMP_WORD_AND_SCORE(header) \
        do { char charBuf[50]; char prevWordCharBuf[50]; \
        INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
        INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
        INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \
                mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf); \
                mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \
                NELEMS(prevWordCharBuf)); \
        AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \
                getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
                getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
+14 −0
Original line number Diff line number Diff line
@@ -53,6 +53,20 @@ class BinaryDictionaryHeader {
        return mMultiWordCostMultiplier;
    }

    AK_FORCE_INLINE void readHeaderValueOrQuestionMark(const char *const key,
            int *outValue, int outValueSize) const {
        if (outValueSize <= 0) return;
        if (outValueSize == 1) {
            outValue[0] = '\0';
            return;
        }
        if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo,
                key, outValue, outValueSize)) {
            outValue[0] = '?';
            outValue[1] = '\0';
        }
    }

 private:
    DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader);

+3 −1
Original line number Diff line number Diff line
@@ -82,8 +82,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
        if(ByteArrayUtils::compareStringInBufferWithCharArray(
                binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
            // The key was found.
            ByteArrayUtils::readStringAndAdvancePosition(
            const int length = ByteArrayUtils::readStringAndAdvancePosition(
                    binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos);
            // Add a 0 terminator to the string.
            outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
            return true;
        }
        ByteArrayUtils::advancePositionToBehindString(
Loading