Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4c5e6634 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Groundwork for implementing GC."

parents ff493744 b7e8a9ab
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -98,6 +98,13 @@ public:
           flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
           *outOffsetFieldSize = 1;
       }

       // Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
       // writing.
       // TODO: Remove following 2 lines and optimize memory space.
       flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
       *outOffsetFieldSize = 3;

       *outBigramFlags = flags;
       *outOffset = absOffest;
       return true;
+4 −4
Original line number Diff line number Diff line
@@ -54,8 +54,8 @@ void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
    }
}

bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos,
        int *outBigramsCount) {
bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite,
        int *const fromPos, int *const toPos, int *const outBigramsCount) const {
    const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
    if (usesAdditionalBuffer) {
        *fromPos -= mBuffer->getOriginalBufferSize();
@@ -86,10 +86,10 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
            continue;
        }
        // Write bigram entry. Target buffer is always the additional buffer.
        if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
        if (!bufferToWrite->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
            return false;
        }
        if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
        if (!bufferToWrite->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
                toPos)) {
            return false;
        }
+5 −4
Original line number Diff line number Diff line
@@ -44,10 +44,11 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {

    void skipAllBigrams(int *const pos) const;

    // Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
    // positions after bigram lists. This method skips invalid bigram entries and write the valid
    // bigram entry count to outBigramsCount.
    bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount);
    // Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
    // bufferToWrite and advance these positions after bigram lists. This method skips invalid
    // bigram entries and write the valid bigram entry count to outBigramsCount.
    bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
            int *const toPos, int *const outBigramsCount) const;

    bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);

+102 −53
Original line number Diff line number Diff line
@@ -97,8 +97,8 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
        return false;
    }
    int writingPos = newNodePos;
    // Write a new PtNode using original PtNode's info to the tail of the dictionary.
    if (!writePtNodeToBufferByCopyingPtNodeInfo(&nodeReader, nodeReader.getParentPos(),
    // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
    if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
            mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
            &writingPos)) {
        return false;
@@ -143,38 +143,20 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
    if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) {
        return;
    }
    const int tmpFileNameBufSize = strlen(fileName)
            + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1;
    char tmpFileName[tmpFileNameBufSize];
    snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName,
            TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
    FILE *const file = fopen(tmpFileName, "wb");
    if (!file) {
        return;
    flushAllToFile(fileName, &headerBuffer, mBuffer);
}
    // Write header.
    if (fwrite(headerBuffer.getBuffer(true /* usesAdditionalBuffer */),
            headerBuffer.getTailPosition(), 1, file) < 1) {
        fclose(file);
        remove(tmpFileName);
        return;
    }
    // Write data in original buffer.
    if (fwrite(mBuffer->getBuffer(false /* usesAdditionalBuffer */),
            mBuffer->getOriginalBufferSize(), 1, file) < 1) {
        fclose(file);
        remove(tmpFileName);

void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
        const char *const fileName, const HeaderPolicy *const headerPolicy) {
    BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
    if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
        return;
    }
    // Write data in additional buffer.
    if (fwrite(mBuffer->getBuffer(true /* usesAdditionalBuffer */),
            mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize(), 1, file) < 1) {
        fclose(file);
        remove(tmpFileName);
    BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
    if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
        return;
    }
    fclose(file);
    rename(tmpFileName, fileName);
    flushAllToFile(fileName, &headerBuffer, &newDictBuffer);
}

bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
@@ -232,7 +214,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
}

// Write new PtNode at writingPos.
bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const bool isBlacklisted,
bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
        BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
        const bool isNotAWord, const int parentPos, const int *const codePoints,
        const int codePointCount, const int probability, const int childrenPos,
        const int originalBigramListPos, const int originalShortcutListPos,
@@ -240,38 +223,39 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
    const int nodePos = *writingPos;
    // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
    // PtNode writing.
    if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */,
            writingPos)) {
    if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
            0 /* nodeFlags */, writingPos)) {
        return false;
    }
    // Calculate a parent offset and write the offset.
    const int parentOffset = (parentPos != NOT_A_DICT_POS) ? parentPos - nodePos : NOT_A_DICT_POS;
    if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(mBuffer,
    if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(bufferToWrite,
            parentOffset, writingPos)) {
        return false;
    }
    // Write code points
    if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
            codePointCount, writingPos)) {
    if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
            codePoints, codePointCount, writingPos)) {
        return false;
    }
    // Write probability when the probability is a valid probability, which means this node is
    // terminal.
    if (probability != NOT_A_PROBABILITY) {
        if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
        if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
                probability, writingPos)) {
            return false;
        }
    }
    // Write children position
    if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
    if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
            childrenPos, writingPos)) {
        return false;
    }
    // Copy shortcut list when the originalShortcutListPos is valid dictionary position.
    if (originalShortcutListPos != NOT_A_DICT_POS) {
        int fromPos = originalShortcutListPos;
        if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(&fromPos, writingPos)) {
        if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
                writingPos)) {
            return false;
        }
    }
@@ -279,7 +263,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
    int bigramCount = 0;
    if (originalBigramListPos != NOT_A_DICT_POS) {
        int fromPos = originalBigramListPos;
        if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) {
        if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
            return false;
        }
    }
@@ -291,27 +275,29 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
                    bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
                    CHILDREN_POSITION_FIELD_SIZE);
    int flagsFieldPos = nodePos;
    if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
    if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
            &flagsFieldPos)) {
        return false;
    }
    return true;
}

bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(const int parentPos,
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
        BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
        const int *const codePoints, const int codePointCount, const int probability,
        int *const writingPos) {
    return writePtNodeWithFullInfoToBuffer(false /* isBlacklisted */, false /* isNotAWord */,
            parentPos, codePoints, codePointCount, probability,
    return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
            false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
            NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
            NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
}

bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
        BufferWithExtendableBuffer *const bufferToWrite,
        const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
        const int *const codePoints, const int codePointCount, const int probability,
        int *const writingPos) {
    return writePtNodeWithFullInfoToBuffer(originalNode->isBlacklisted(),
    return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
            originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
            originalNode->getChildrenPos(), originalNode->getBigramsPos(),
            originalNode->getShortcutPos(), writingPos);
@@ -345,8 +331,9 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
        if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
            return false;
        }
        if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(),
                codePoints, originalPtNode->getCodePointCount(), probability, &movedPos)) {
        if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
                originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
                probability, &movedPos)) {
            return false;
        }
    }
@@ -374,8 +361,8 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
            1 /* arraySize */, &writingPos)) {
        return false;
    }
    if (!writePtNodeToBuffer(parentPtNodePos, nodeCodePoints, nodeCodePointCount, probability,
            &writingPos)) {
    if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
            probability, &writingPos)) {
        return false;
    }
    if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
@@ -404,8 +391,9 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
    // Write the 1st part of the reallocating node. The children position will be updated later
    // with actual children position.
    const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
    if (!writePtNodeToBuffer(reallocatingPtNode->getParentPos(), reallocatingPtNodeCodePoints,
            overlappingCodePointCount, newProbability, &writingPos)) {
    if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
            reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
            &writingPos)) {
        return false;
    }
    const int actualChildrenPos = writingPos;
@@ -417,14 +405,15 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
    }
    // Write the 2nd part of the reallocating node.
    const int secondPartOfReallocatedPtNodePos = writingPos;
    if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
    if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
            firstPartOfReallocatedPtNodePos,
            reallocatingPtNodeCodePoints + overlappingCodePointCount,
            reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
            reallocatingPtNode->getProbability(), &writingPos)) {
        return false;
    }
    if (addsExtraChild) {
        if (!writePtNodeToBuffer(firstPartOfReallocatedPtNodePos,
        if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
                newNodeCodePoints + overlappingCodePointCount,
                newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
                &writingPos)) {
@@ -452,4 +441,64 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
    return true;
}

// TODO: Create a struct which contains header, body and etc... and use here as an argument.
void DynamicPatriciaTrieWritingHelper::flushAllToFile(const char *const fileName,
        BufferWithExtendableBuffer *const dictHeader,
        BufferWithExtendableBuffer *const dictBody) const {
    const int tmpFileNameBufSize = strlen(fileName)
            + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */;
    // Name of a temporary file used for writing that is a connected string of original name and
    // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE.
    char tmpFileName[tmpFileNameBufSize];
    snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName,
            TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
    FILE *const file = fopen(tmpFileName, "wb");
    if (!file) {
        AKLOGI("Dictionary file %s cannnot be opened.", tmpFileName);
        ASSERT(false);
        return;
    }
    // Write the dictionary header.
    if (!writeBufferToFilePointer(file, dictHeader)) {
        remove(tmpFileName);
        AKLOGI("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition());
        ASSERT(false);
        return;
    }
    // Write the dictionary body.
    if (!writeBufferToFilePointer(file, dictBody)) {
        remove(tmpFileName);
        AKLOGI("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition());
        ASSERT(false);
        return;
    }
    fclose(file);
    rename(tmpFileName, fileName);
}

// This closes file pointer when an error is caused and returns whether the writing was succeeded
// or not.
bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file,
        const BufferWithExtendableBuffer *const buffer) const {
    const int originalBufSize = buffer->getOriginalBufferSize();
    if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */),
            originalBufSize, 1, file) < 1) {
        fclose(file);
        return false;
    }
    const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize();
    if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
            additionalBufSize, 1, file) < 1) {
        fclose(file);
        return false;
    }
    return true;
}

bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
        BufferWithExtendableBuffer *const bufferToWrite) {
    // TODO: Implement.
    return false;
}

} // namespace latinime
+18 −5
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H

#include <cstdio>
#include <stdint.h>

#include "defines.h"
@@ -51,7 +52,8 @@ class DynamicPatriciaTrieWritingHelper {

    void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy);

    void writeToDictFileWithGC(const char *const fileName, const HeaderPolicy *const headerPolicy);
    void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
            const HeaderPolicy *const headerPolicy);

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
@@ -66,15 +68,17 @@ class DynamicPatriciaTrieWritingHelper {
    bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
            const int movedPos, const int bigramLinkedNodePos);

    bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord,
    bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
            const bool isBlacklisted, const bool isNotAWord,
            const int parentPos,  const int *const codePoints, const int codePointCount,
            const int probability, const int childrenPos, const int originalBigramListPos,
            const int originalShortcutListPos, int *const writingPos);

    bool writePtNodeToBuffer(const int parentPos, const int *const codePoints,
            const int codePointCount, const int probability, int *const writingPos);
    bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
            const int parentPos, const int *const codePoints, const int codePointCount,
            const int probability, int *const writingPos);

    bool writePtNodeToBufferByCopyingPtNodeInfo(
    bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
            const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
            const int *const codePoints, const int codePointCount, const int probability,
            int *const writingPos);
@@ -97,6 +101,15 @@ class DynamicPatriciaTrieWritingHelper {
            const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
            const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
            const int newNodeCodePointCount);

    void flushAllToFile(const char *const fileName,
            BufferWithExtendableBuffer *const dictHeader,
            BufferWithExtendableBuffer *const dictBody) const;

    bool writeBufferToFilePointer(FILE *const file,
            const BufferWithExtendableBuffer *const buffer) const;

    bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite);
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
Loading