Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 68a52c12 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Update FormatSpec.java"

parents 91f52e88 c7b63935
Loading
Loading
Loading
Loading
+10 −83
Original line number Original line Diff line number Diff line
@@ -36,9 +36,7 @@ public final class FormatSpec {
     * sion
     * sion
     *
     *
     * o |
     * o |
     * p | not used                                3 bits
     * p | not used, 2 bytes.
     * t | each unigram and bigram entry has a time stamp?
     * i |                                         1 bit, 1 = yes, 0 = no : CONTAINS_TIMESTAMP_FLAG
     * o |
     * o |
     * nflags
     * nflags
     *
     *
@@ -48,7 +46,7 @@ public final class FormatSpec {
     * d |
     * d |
     * ersize
     * ersize
     *
     *
     *   | attributes list
     * attributes list
     *
     *
     * attributes list is:
     * attributes list is:
     * <key>   = | string of characters at the char format described below, with the terminator used
     * <key>   = | string of characters at the char format described below, with the terminator used
@@ -86,11 +84,10 @@ public final class FormatSpec {
     */
     */


    /* Node (FusionDictionary.PtNode) layout is as follows:
    /* Node (FusionDictionary.PtNode) layout is as follows:
     *   | is moved ?             2 bits, 11 = no          : FLAG_IS_NOT_MOVED
     *   | CHILDREN_ADDRESS_TYPE  2 bits, 11          : FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES
     *   |                          This must be the same as FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES
     *   |                                10          : FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES
     *   |                                01 = yes         : FLAG_IS_MOVED
     * f |                                01          : FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE
     * f |                      the new address is stored in the same place as the parent address
     * l |                                00          : FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS
     * l | is deleted?                    10 = yes         : FLAG_IS_DELETED
     * a | has several chars ?         1 bit, 1 = yes, 0 = no   : FLAG_HAS_MULTIPLE_CHARS
     * a | has several chars ?         1 bit, 1 = yes, 0 = no   : FLAG_HAS_MULTIPLE_CHARS
     * g | has a terminal ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_TERMINAL
     * g | has a terminal ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_TERMINAL
     * s | has shortcut targets ?      1 bit, 1 = yes, 0 = no   : FLAG_HAS_SHORTCUT_TARGETS
     * s | has shortcut targets ?      1 bit, 1 = yes, 0 = no   : FLAG_HAS_SHORTCUT_TARGETS
@@ -98,16 +95,6 @@ public final class FormatSpec {
     *   | is not a word ?             1 bit, 1 = yes, 0 = no   : FLAG_IS_NOT_A_WORD
     *   | is not a word ?             1 bit, 1 = yes, 0 = no   : FLAG_IS_NOT_A_WORD
     *   | is blacklisted ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_BLACKLISTED
     *   | is blacklisted ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_BLACKLISTED
     *
     *
     * p |
     * a | parent address, 3byte
     * r | 1 byte = bbbbbbbb match
     * e |   case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
     * n |   otherwise => (bbbbbbbb << 16) + (next byte << 8) + next byte
     * t | This address is relative to the head of the PtNode.
     * a | If the node doesn't have a parent, this field is set to 0.
     * d |
     * dress
     *
     * c | IF FLAG_HAS_MULTIPLE_CHARS
     * c | IF FLAG_HAS_MULTIPLE_CHARS
     * h |   char, char, char, char    n * (1 or 3 bytes) : use PtNodeInfo for i/o helpers
     * h |   char, char, char, char    n * (1 or 3 bytes) : use PtNodeInfo for i/o helpers
     * a |   end                       1 byte, = 0
     * a |   end                       1 byte, = 0
@@ -121,15 +108,10 @@ public final class FormatSpec {
     * q |
     * q |
     *
     *
     * c |
     * c |
     * h | children address, 3 bytes
     * h | children address, CHILDREN_ADDRESS_TYPE bytes
     * i | 1 byte = bbbbbbbb match
     * i | This address is relative to the position of this field.
     * l |   case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
     * l |
     * d |   otherwise => (bbbbbbbb<<16) + (next byte << 8) + next byte
     * drenaddress
     * r | if this node doesn't have children, this field is set to 0.
     * e |   (see BinaryDictEncoderUtils#writeVariableSignedAddress)
     * n | This address is relative to the position of this field.
     * a |
     * ddress
     *
     *
     *   | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
     *   | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
     *   | shortcut string list
     *   | shortcut string list
@@ -179,8 +161,6 @@ public final class FormatSpec {


    public static final int MAGIC_NUMBER = 0x9BC13AFE;
    public static final int MAGIC_NUMBER = 0x9BC13AFE;
    static final int NOT_A_VERSION_NUMBER = -1;
    static final int NOT_A_VERSION_NUMBER = -1;
    static final int FIRST_VERSION_WITH_DYNAMIC_UPDATE = 3;
    static final int FIRST_VERSION_WITH_TERMINAL_ID = 4;


    // These MUST have the same values as the relevant constants in format_utils.h.
    // These MUST have the same values as the relevant constants in format_utils.h.
    // From version 4 on, we use version * 100 + revision as a version number. That allows
    // From version 4 on, we use version * 100 + revision as a version number. That allows
@@ -202,9 +182,6 @@ public final class FormatSpec {
    // use it in the reading code.
    // use it in the reading code.
    static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH;
    static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH;


    static final int PARENT_ADDRESS_SIZE = 3;
    static final int FORWARD_LINK_ADDRESS_SIZE = 3;

    // These flags are used only in the static dictionary.
    // These flags are used only in the static dictionary.
    static final int MASK_CHILDREN_ADDRESS_TYPE = 0xC0;
    static final int MASK_CHILDREN_ADDRESS_TYPE = 0xC0;
    static final int FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS = 0x00;
    static final int FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS = 0x00;
@@ -220,13 +197,6 @@ public final class FormatSpec {
    static final int FLAG_IS_NOT_A_WORD = 0x02;
    static final int FLAG_IS_NOT_A_WORD = 0x02;
    static final int FLAG_IS_BLACKLISTED = 0x01;
    static final int FLAG_IS_BLACKLISTED = 0x01;


    // These flags are used only in the dynamic dictionary.
    static final int MASK_MOVE_AND_DELETE_FLAG = 0xC0;
    static final int FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE = 0x40;
    static final int FLAG_IS_MOVED = 0x00 | FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE;
    static final int FLAG_IS_NOT_MOVED = 0x80 | FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE;
    static final int FLAG_IS_DELETED = 0x80;

    static final int FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT = 0x80;
    static final int FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT = 0x80;
    static final int FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE = 0x40;
    static final int FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE = 0x40;
    static final int MASK_BIGRAM_ATTR_ADDRESS_TYPE = 0x30;
    static final int MASK_BIGRAM_ATTR_ADDRESS_TYPE = 0x30;
@@ -240,52 +210,12 @@ public final class FormatSpec {
    static final int PTNODE_TERMINATOR_SIZE = 1;
    static final int PTNODE_TERMINATOR_SIZE = 1;
    static final int PTNODE_FLAGS_SIZE = 1;
    static final int PTNODE_FLAGS_SIZE = 1;
    static final int PTNODE_FREQUENCY_SIZE = 1;
    static final int PTNODE_FREQUENCY_SIZE = 1;
    static final int PTNODE_TERMINAL_ID_SIZE = 4;
    static final int PTNODE_MAX_ADDRESS_SIZE = 3;
    static final int PTNODE_MAX_ADDRESS_SIZE = 3;
    static final int PTNODE_ATTRIBUTE_FLAGS_SIZE = 1;
    static final int PTNODE_ATTRIBUTE_FLAGS_SIZE = 1;
    static final int PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE = 3;
    static final int PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE = 3;
    static final int PTNODE_SHORTCUT_LIST_SIZE_SIZE = 2;
    static final int PTNODE_SHORTCUT_LIST_SIZE_SIZE = 2;


    // These values are used only by version 4 or later. They MUST match the definitions in
    // ver4_dict_constants.cpp.
    static final String TRIE_FILE_EXTENSION = ".trie";
    public static final String HEADER_FILE_EXTENSION = ".header";
    static final String FREQ_FILE_EXTENSION = ".freq";
    // tat = Terminal Address Table
    static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
    static final String BIGRAM_FILE_EXTENSION = ".bigram";
    static final String SHORTCUT_FILE_EXTENSION = ".shortcut";
    static final String LOOKUP_TABLE_FILE_SUFFIX = "_lookup";
    static final String CONTENT_TABLE_FILE_SUFFIX = "_index";
    static final int FLAGS_IN_FREQ_FILE_SIZE = 1;
    static final int FREQUENCY_AND_FLAGS_SIZE = 2;
    static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
    static final int UNIGRAM_TIMESTAMP_SIZE = 4;
    static final int UNIGRAM_COUNTER_SIZE = 1;
    static final int UNIGRAM_LEVEL_SIZE = 1;

    // With the English main dictionary as of October 2013, the size of bigram address table is
    // is 345KB with the block size being 16.
    // This is 54% of that of full address table.
    static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
    static final int BIGRAM_CONTENT_COUNT = 1;
    static final int BIGRAM_FREQ_CONTENT_INDEX = 0;
    static final String BIGRAM_FREQ_CONTENT_ID = "_freq";
    static final int BIGRAM_TIMESTAMP_SIZE = 4;
    static final int BIGRAM_COUNTER_SIZE = 1;
    static final int BIGRAM_LEVEL_SIZE = 1;

    static final int SHORTCUT_CONTENT_COUNT = 1;
    static final int SHORTCUT_CONTENT_INDEX = 0;
    // With the English main dictionary as of October 2013, the size of shortcut address table is
    // 26KB with the block size being 64.
    // This is only 4.4% of that of full address table.
    static final int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
    static final String SHORTCUT_CONTENT_ID = "_shortcut";

    static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
    static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
    static final int NO_PARENT_ADDRESS = 0;
    static final int NO_FORWARD_LINK_ADDRESS = 0;
    static final int INVALID_CHARACTER = -1;
    static final int INVALID_CHARACTER = -1;


    static final int MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT = 0x7F; // 127
    static final int MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT = 0x7F; // 127
@@ -302,14 +232,11 @@ public final class FormatSpec {


    // This option needs to be the same numeric value as the one in binary_format.h.
    // This option needs to be the same numeric value as the one in binary_format.h.
    static final int NOT_VALID_WORD = -99;
    static final int NOT_VALID_WORD = -99;
    static final int SIGNED_CHILDREN_ADDRESS_SIZE = 3;


    static final int UINT8_MAX = 0xFF;
    static final int UINT8_MAX = 0xFF;
    static final int UINT16_MAX = 0xFFFF;
    static final int UINT16_MAX = 0xFFFF;
    static final int UINT24_MAX = 0xFFFFFF;
    static final int UINT24_MAX = 0xFFFFFF;
    static final int SINT24_MAX = 0x7FFFFF;
    static final int MSB8 = 0x80;
    static final int MSB8 = 0x80;
    static final int MSB24 = 0x800000;


    /**
    /**
     * Options about file format.
     * Options about file format.