Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b7e8a9ab authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Groundwork for implementing GC.

Bug: 6669677

Change-Id: Ia2b2f338795ab62cc9eef73731285b2a5a0ac5e4
parent 86780628
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -98,6 +98,13 @@ public:
           flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
           *outOffsetFieldSize = 1;
       }

       // Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
       // writing.
       // TODO: Remove following 2 lines and optimize memory space.
       flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
       *outOffsetFieldSize = 3;

       *outBigramFlags = flags;
       *outOffset = absOffest;
       return true;
+4 −4
Original line number Diff line number Diff line
@@ -54,8 +54,8 @@ void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
    }
}

bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos,
        int *outBigramsCount) {
bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite,
        int *const fromPos, int *const toPos, int *const outBigramsCount) const {
    const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
    if (usesAdditionalBuffer) {
        *fromPos -= mBuffer->getOriginalBufferSize();
@@ -86,10 +86,10 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
            continue;
        }
        // Write bigram entry. Target buffer is always the additional buffer.
        if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
        if (!bufferToWrite->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
            return false;
        }
        if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
        if (!bufferToWrite->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
                toPos)) {
            return false;
        }
+5 −4
Original line number Diff line number Diff line
@@ -44,10 +44,11 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {

    void skipAllBigrams(int *const pos) const;

    // Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
    // positions after bigram lists. This method skips invalid bigram entries and write the valid
    // bigram entry count to outBigramsCount.
    bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount);
    // Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
    // bufferToWrite and advance these positions after bigram lists. This method skips invalid
    // bigram entries and write the valid bigram entry count to outBigramsCount.
    bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
            int *const toPos, int *const outBigramsCount) const;

    bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);

+102 −53
Original line number Diff line number Diff line
@@ -97,8 +97,8 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
        return false;
    }
    int writingPos = newNodePos;
    // Write a new PtNode using original PtNode's info to the tail of the dictionary.
    if (!writePtNodeToBufferByCopyingPtNodeInfo(&nodeReader, nodeReader.getParentPos(),
    // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
    if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
            mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
            &writingPos)) {
        return false;
@@ -143,38 +143,20 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
    if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) {
        return;
    }
    const int tmpFileNameBufSize = strlen(fileName)
            + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1;
    char tmpFileName[tmpFileNameBufSize];
    snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName,
            TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
    FILE *const file = fopen(tmpFileName, "wb");
    if (!file) {
        return;
    flushAllToFile(fileName, &headerBuffer, mBuffer);
}
    // Write header.
    if (fwrite(headerBuffer.getBuffer(true /* usesAdditionalBuffer */),
            headerBuffer.getTailPosition(), 1, file) < 1) {
        fclose(file);
        remove(tmpFileName);
        return;
    }
    // Write data in original buffer.
    if (fwrite(mBuffer->getBuffer(false /* usesAdditionalBuffer */),
            mBuffer->getOriginalBufferSize(), 1, file) < 1) {
        fclose(file);
        remove(tmpFileName);

void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
        const char *const fileName, const HeaderPolicy *const headerPolicy) {
    BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
    if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
        return;
    }
    // Write data in additional buffer.
    if (fwrite(mBuffer->getBuffer(true /* usesAdditionalBuffer */),
            mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize(), 1, file) < 1) {
        fclose(file);
        remove(tmpFileName);
    BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
    if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
        return;
    }
    fclose(file);
    rename(tmpFileName, fileName);
    flushAllToFile(fileName, &headerBuffer, &newDictBuffer);
}

bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
@@ -232,7 +214,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
}

// Write new PtNode at writingPos.
bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const bool isBlacklisted,
bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
        BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
        const bool isNotAWord, const int parentPos, const int *const codePoints,
        const int codePointCount, const int probability, const int childrenPos,
        const int originalBigramListPos, const int originalShortcutListPos,
@@ -240,38 +223,39 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
    const int nodePos = *writingPos;
    // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
    // PtNode writing.
    if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */,
            writingPos)) {
    if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
            0 /* nodeFlags */, writingPos)) {
        return false;
    }
    // Calculate a parent offset and write the offset.
    const int parentOffset = (parentPos != NOT_A_DICT_POS) ? parentPos - nodePos : NOT_A_DICT_POS;
    if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(mBuffer,
    if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(bufferToWrite,
            parentOffset, writingPos)) {
        return false;
    }
    // Write code points
    if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
            codePointCount, writingPos)) {
    if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
            codePoints, codePointCount, writingPos)) {
        return false;
    }
    // Write probability when the probability is a valid probability, which means this node is
    // terminal.
    if (probability != NOT_A_PROBABILITY) {
        if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
        if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
                probability, writingPos)) {
            return false;
        }
    }
    // Write children position
    if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
    if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
            childrenPos, writingPos)) {
        return false;
    }
    // Copy shortcut list when the originalShortcutListPos is valid dictionary position.
    if (originalShortcutListPos != NOT_A_DICT_POS) {
        int fromPos = originalShortcutListPos;
        if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(&fromPos, writingPos)) {
        if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
                writingPos)) {
            return false;
        }
    }
@@ -279,7 +263,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
    int bigramCount = 0;
    if (originalBigramListPos != NOT_A_DICT_POS) {
        int fromPos = originalBigramListPos;
        if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) {
        if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
            return false;
        }
    }
@@ -291,27 +275,29 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
                    bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
                    CHILDREN_POSITION_FIELD_SIZE);
    int flagsFieldPos = nodePos;
    if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
    if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
            &flagsFieldPos)) {
        return false;
    }
    return true;
}

bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(const int parentPos,
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
        BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
        const int *const codePoints, const int codePointCount, const int probability,
        int *const writingPos) {
    return writePtNodeWithFullInfoToBuffer(false /* isBlacklisted */, false /* isNotAWord */,
            parentPos, codePoints, codePointCount, probability,
    return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
            false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
            NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
            NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
}

bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
        BufferWithExtendableBuffer *const bufferToWrite,
        const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
        const int *const codePoints, const int codePointCount, const int probability,
        int *const writingPos) {
    return writePtNodeWithFullInfoToBuffer(originalNode->isBlacklisted(),
    return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
            originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
            originalNode->getChildrenPos(), originalNode->getBigramsPos(),
            originalNode->getShortcutPos(), writingPos);
@@ -345,8 +331,9 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
        if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
            return false;
        }
        if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(),
                codePoints, originalPtNode->getCodePointCount(), probability, &movedPos)) {
        if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
                originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
                probability, &movedPos)) {
            return false;
        }
    }
@@ -374,8 +361,8 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
            1 /* arraySize */, &writingPos)) {
        return false;
    }
    if (!writePtNodeToBuffer(parentPtNodePos, nodeCodePoints, nodeCodePointCount, probability,
            &writingPos)) {
    if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
            probability, &writingPos)) {
        return false;
    }
    if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
@@ -404,8 +391,9 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
    // Write the 1st part of the reallocating node. The children position will be updated later
    // with actual children position.
    const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
    if (!writePtNodeToBuffer(reallocatingPtNode->getParentPos(), reallocatingPtNodeCodePoints,
            overlappingCodePointCount, newProbability, &writingPos)) {
    if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
            reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
            &writingPos)) {
        return false;
    }
    const int actualChildrenPos = writingPos;
@@ -417,14 +405,15 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
    }
    // Write the 2nd part of the reallocating node.
    const int secondPartOfReallocatedPtNodePos = writingPos;
    if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
    if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
            firstPartOfReallocatedPtNodePos,
            reallocatingPtNodeCodePoints + overlappingCodePointCount,
            reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
            reallocatingPtNode->getProbability(), &writingPos)) {
        return false;
    }
    if (addsExtraChild) {
        if (!writePtNodeToBuffer(firstPartOfReallocatedPtNodePos,
        if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
                newNodeCodePoints + overlappingCodePointCount,
                newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
                &writingPos)) {
@@ -452,4 +441,64 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
    return true;
}

// TODO: Create a struct which contains header, body and etc... and use here as an argument.
void DynamicPatriciaTrieWritingHelper::flushAllToFile(const char *const fileName,
        BufferWithExtendableBuffer *const dictHeader,
        BufferWithExtendableBuffer *const dictBody) const {
    const int tmpFileNameBufSize = strlen(fileName)
            + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */;
    // Name of a temporary file used for writing that is a connected string of original name and
    // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE.
    char tmpFileName[tmpFileNameBufSize];
    snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName,
            TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
    FILE *const file = fopen(tmpFileName, "wb");
    if (!file) {
        AKLOGI("Dictionary file %s cannnot be opened.", tmpFileName);
        ASSERT(false);
        return;
    }
    // Write the dictionary header.
    if (!writeBufferToFilePointer(file, dictHeader)) {
        remove(tmpFileName);
        AKLOGI("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition());
        ASSERT(false);
        return;
    }
    // Write the dictionary body.
    if (!writeBufferToFilePointer(file, dictBody)) {
        remove(tmpFileName);
        AKLOGI("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition());
        ASSERT(false);
        return;
    }
    fclose(file);
    rename(tmpFileName, fileName);
}

// This closes file pointer when an error is caused and returns whether the writing was succeeded
// or not.
bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file,
        const BufferWithExtendableBuffer *const buffer) const {
    const int originalBufSize = buffer->getOriginalBufferSize();
    if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */),
            originalBufSize, 1, file) < 1) {
        fclose(file);
        return false;
    }
    const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize();
    if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
            additionalBufSize, 1, file) < 1) {
        fclose(file);
        return false;
    }
    return true;
}

bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
        BufferWithExtendableBuffer *const bufferToWrite) {
    // TODO: Implement.
    return false;
}

} // namespace latinime
+18 −5
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H

#include <cstdio>
#include <stdint.h>

#include "defines.h"
@@ -51,7 +52,8 @@ class DynamicPatriciaTrieWritingHelper {

    void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy);

    void writeToDictFileWithGC(const char *const fileName, const HeaderPolicy *const headerPolicy);
    void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
            const HeaderPolicy *const headerPolicy);

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
@@ -66,15 +68,17 @@ class DynamicPatriciaTrieWritingHelper {
    bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
            const int movedPos, const int bigramLinkedNodePos);

    bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord,
    bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
            const bool isBlacklisted, const bool isNotAWord,
            const int parentPos,  const int *const codePoints, const int codePointCount,
            const int probability, const int childrenPos, const int originalBigramListPos,
            const int originalShortcutListPos, int *const writingPos);

    bool writePtNodeToBuffer(const int parentPos, const int *const codePoints,
            const int codePointCount, const int probability, int *const writingPos);
    bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
            const int parentPos, const int *const codePoints, const int codePointCount,
            const int probability, int *const writingPos);

    bool writePtNodeToBufferByCopyingPtNodeInfo(
    bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
            const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
            const int *const codePoints, const int codePointCount, const int probability,
            int *const writingPos);
@@ -97,6 +101,15 @@ class DynamicPatriciaTrieWritingHelper {
            const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
            const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
            const int newNodeCodePointCount);

    void flushAllToFile(const char *const fileName,
            BufferWithExtendableBuffer *const dictHeader,
            BufferWithExtendableBuffer *const dictBody) const;

    bool writeBufferToFilePointer(FILE *const file,
            const BufferWithExtendableBuffer *const buffer) const;

    bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite);
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
Loading