Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ad518d9a authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Avoid copying bigram list if possible.

Constructing en_US main dict using dicttool:
Before:
real    1m8.699s
user    1m10.600s
sys     0m2.390s
After:
real    0m17.204s
user    0m20.560s
sys     0m0.720s


Bug: 13406708
Change-Id: I3b0476be57e5cb93c6497025b3ffa7064ac326c6
parent 8d8fb396
Loading
Loading
Loading
Loading
+60 −40
Original line number Diff line number Diff line
@@ -50,12 +50,18 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out

bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
        const int newProbability, const int timestamp, bool *const outAddedNewEntry) {
    // 1. The word has no bigrams yet.
    // 2. The word has bigrams, and there is the target in the list.
    // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
    // 4. The word has bigrams. We have to append new bigram entry to the list.
    // 5. Same as 4, but the list is the last entry of the content file.

    if (outAddedNewEntry) {
        *outAddedNewEntry = false;
    }
    const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
    if (bigramListPos == NOT_A_DICT_POS) {
        // Updating PtNode that doesn't have a bigram list.
        // Case 1. PtNode that doesn't have a bigram list.
        // Create new bigram list.
        if (!mBigramDictContent->createNewBigramList(terminalId)) {
            return false;
@@ -75,37 +81,26 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
        return true;
    }

    const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
    if (entryPosToUpdate != NOT_A_DICT_POS) {
        // Overwrite existing entry.
        const BigramEntry originalBigramEntry =
                mBigramDictContent->getBigramEntry(entryPosToUpdate);
        if (!originalBigramEntry.isValid()) {
            // Reuse invalid entry.
            if (outAddedNewEntry) {
                *outAddedNewEntry = true;
            }
        }
        const BigramEntry updatedBigramEntry =
                originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
        const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
                &updatedBigramEntry, newProbability, timestamp);
        return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
    }

    int tailEntryPos = NOT_A_DICT_POS;
    const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
            &tailEntryPos);
    if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
        // Case 4, 5.
        // Add new entry to the bigram list.
    // Create new bigram list.
        if (tailEntryPos == NOT_A_DICT_POS) {
            // Case 4. Create new bigram list.
            if (!mBigramDictContent->createNewBigramList(terminalId)) {
                return false;
            }
    int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
    int tailEntryPos = NOT_A_DICT_POS;
            const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
            // Copy existing bigram list.
    if (!mBigramDictContent->copyBigramList(bigramListPos, writingPos, &tailEntryPos)) {
            if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
                return false;
            }
        }
        // Write new entry at the tail position of the bigram content.
    const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
        const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
                newTargetTerminalId);
        const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
                &newBigramEntry, newProbability, timestamp);
        if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
@@ -121,13 +116,30 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
        return true;
    }

    // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
    const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
    if (!originalBigramEntry.isValid()) {
        // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
        // entry is updated.
        if (outAddedNewEntry) {
            *outAddedNewEntry = true;
        }
    }
    const BigramEntry updatedBigramEntry =
            originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
    const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
            &updatedBigramEntry, newProbability, timestamp);
    return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
}

bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
    const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
    if (bigramListPos == NOT_A_DICT_POS) {
        // Bigram list doesn't exist.
        return false;
    }
    const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
    const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
            nullptr /* outTailEntryPos */);
    if (entryPosToUpdate == NOT_A_DICT_POS) {
        // Bigram entry doesn't exist.
        return false;
@@ -212,7 +224,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
}

int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
        const int bigramListPos) const {
        const int bigramListPos, int *const outTailEntryPos) const {
    if (outTailEntryPos) {
        *outTailEntryPos = NOT_A_DICT_POS;
    }
    bool hasNext = true;
    int invalidEntryPos = NOT_A_DICT_POS;
    int readingPos = bigramListPos;
@@ -228,6 +243,11 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
            // Invalid entry that can be reused is found.
            invalidEntryPos = entryPos;
        }
        if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
            if (outTailEntryPos) {
                *outTailEntryPos = entryPos;
            }
        }
    }
    return invalidEntryPos;
}
+2 −1
Original line number Diff line number Diff line
@@ -56,7 +56,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);

    int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
    int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
            int *const outTailEntryPos) const;

    const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
            const int newProbability, const int timestamp) const;
+4 −0
Original line number Diff line number Diff line
@@ -88,6 +88,10 @@ class BigramDictContent : public SparseTableDictContent {
            const BigramDictContent *const originalBigramDictContent,
            int *const outBigramEntryCount);

    bool isContentTailPos(const int pos) const {
        return pos == getContentBuffer()->getTailPosition();
    }

 private:
    DISALLOW_COPY_AND_ASSIGN(BigramDictContent);