Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit da439fa4 authored by Jean Chalard's avatar Jean Chalard Committed by Android (Google) Code Review
Browse files

Merge "Add utilities to read header values."

parents 02996dc5 22025c6a
Loading
Loading
Loading
Loading
+68 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#ifndef LATINIME_BINARY_FORMAT_H
#define LATINIME_BINARY_FORMAT_H

#include <cctype>
#include <limits>
#include <map>
#include "bloom_filter.h"
@@ -64,6 +65,9 @@ class BinaryFormat {
    static int detectFormat(const uint8_t *const dict);
    static unsigned int getHeaderSize(const uint8_t *const dict);
    static unsigned int getFlags(const uint8_t *const dict);
    static void readHeaderValue(const uint8_t *const dict, const char *const key,
            int *outValue, const int outValueSize);
    static int readHeaderValueInt(const uint8_t *const dict, const char *const key);
    static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
    static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
    static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
@@ -167,6 +171,70 @@ inline unsigned int BinaryFormat::getHeaderSize(const uint8_t *const dict) {
    }
}

inline void BinaryFormat::readHeaderValue(const uint8_t *const dict, const char *const key,
        int *outValue, const int outValueSize) {
    int outValueIndex = 0;
    // Only format 2 and above have header attributes as {key,value} string pairs. For prior
    // formats, we just return an empty string, as if the key wasn't found.
    if (2 <= detectFormat(dict)) {
        const int headerOptionsOffset = 4 /* magic number */
                + 2 /* dictionary version */ + 2 /* flags */;
        const int headerSize =
                (dict[headerOptionsOffset] << 24) + (dict[headerOptionsOffset + 1] << 16)
                + (dict[headerOptionsOffset + 2] << 8) + dict[headerOptionsOffset + 3];
        const int headerEnd = headerOptionsOffset + 4 + headerSize;
        int index = headerOptionsOffset + 4;
        while (index < headerEnd) {
            int keyIndex = 0;
            int codePoint = getCodePointAndForwardPointer(dict, &index);
            while (codePoint != NOT_A_CODE_POINT) {
                if (codePoint != key[keyIndex++]) {
                    break;
                }
                codePoint = getCodePointAndForwardPointer(dict, &index);
            }
            if (codePoint == NOT_A_CODE_POINT && key[keyIndex] == 0) {
                // We found the key! Copy and return the value.
                codePoint = getCodePointAndForwardPointer(dict, &index);
                while (codePoint != NOT_A_CODE_POINT
                        && outValueIndex < outValueSize) {
                    outValue[outValueIndex++] = codePoint;
                    codePoint = getCodePointAndForwardPointer(dict, &index);
                }
                if (outValueIndex < outValueIndex) outValue[outValueIndex] = 0;
                // Finished copying. Break to go to the termination code.
                break;
            }
            // We didn't find the key, skip the remainder of it and its value
            while (codePoint != NOT_A_CODE_POINT) {
                codePoint = getCodePointAndForwardPointer(dict, &index);
            }
            codePoint = getCodePointAndForwardPointer(dict, &index);
            while (codePoint != NOT_A_CODE_POINT) {
                codePoint = getCodePointAndForwardPointer(dict, &index);
            }
        }
        // We couldn't find it - fall through and return an empty value.
    }
    // Put a terminator 0 if possible at all (always unless outValueSize is <= 0)
    if (outValueIndex >= outValueSize) outValueIndex = outValueSize - 1;
    if (outValueIndex >= 0) outValue[outValueIndex] = 0;
    return;
}

inline int BinaryFormat::readHeaderValueInt(const uint8_t *const dict, const char *const key) {
    const int bufferSize = LARGEST_INT_DIGIT_COUNT;
    int intBuffer[bufferSize];
    char charBuffer[bufferSize];
    BinaryFormat::readHeaderValue(dict, key, intBuffer, bufferSize);
    for (int i = 0; i < bufferSize; ++i) {
        charBuffer[i] = intBuffer[i];
    }
    // If not a number, return S_INT_MIN
    if (!isdigit(charBuffer[0])) return S_INT_MIN;
    return atoi(charBuffer);
}

AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict,
        int *pos) {
    const int msb = dict[(*pos)++];
+6 −0
Original line number Diff line number Diff line
@@ -251,6 +251,12 @@ static inline void prof_out(void) {
// GCC warns about this.
#define S_INT_MIN (-2147483647 - 1) // -(1 << 31)
#endif
// Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator.
// As such, this is the maximum number of characters will be needed to represent an int as a
// string, including the terminator; this is used as the size of a string buffer large enough to
// hold any value that is intended to fit in an integer, e.g. in the code that reads the header
// of the binary dictionary where a {key,value} string pair scheme is used.
#define LARGEST_INT_DIGIT_COUNT 11

// Define this to use mmap() for dictionary loading.  Undefine to use malloc() instead of mmap().
// We measured and compared performance of both, and found mmap() is fairly good in terms of