Loading native/jni/src/correction.cpp +5 −42 Original line number Diff line number Diff line Loading @@ -23,6 +23,8 @@ #include "defines.h" #include "proximity_info_state.h" #include "suggest_utils.h" #include "suggest/policyimpl/utils/edit_distance.h" #include "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h" namespace latinime { Loading Loading @@ -906,50 +908,11 @@ inline static bool isUpperCase(unsigned short c) { return totalFreq; } /* Damerau-Levenshtein distance */ inline static int editDistanceInternal(int *editDistanceTable, const int *before, const int beforeLength, const int *after, const int afterLength) { // dp[li][lo] dp[a][b] = dp[ a * lo + b] int *dp = editDistanceTable; const int li = beforeLength + 1; const int lo = afterLength + 1; for (int i = 0; i < li; ++i) { dp[lo * i] = i; } for (int i = 0; i < lo; ++i) { dp[i] = i; } for (int i = 0; i < li - 1; ++i) { for (int j = 0; j < lo - 1; ++j) { const int ci = toBaseLowerCase(before[i]); const int co = toBaseLowerCase(after[j]); const int cost = (ci == co) ? 0 : 1; dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1]) && co == toBaseLowerCase(before[i - 1])) { dp[(i + 1) * lo + (j + 1)] = min( dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost); } } } if (DEBUG_EDIT_DISTANCE) { AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength); for (int i = 0; i < li; ++i) { for (int j = 0; j < lo; ++j) { AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]); } } } return dp[li * lo - 1]; } /* static */ int Correction::RankingAlgorithm::editDistance(const int *before, const int beforeLength, const int *after, const int afterLength) { int table[(beforeLength + 1) * (afterLength + 1)]; return editDistanceInternal(table, before, beforeLength, after, afterLength); const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein( before, beforeLength, after, afterLength); return static_cast<int>(EditDistance::getEditDistance(&daemaruLevenshtein)); } Loading native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h 0 → 100644 +79 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H #define LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H #include "char_utils.h" #include "suggest/policyimpl/utils/edit_distance_policy.h" namespace latinime { class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy { public: DamerauLevenshteinEditDistancePolicy(const int *const string0, const int length0, const int *const string1, const int length1) : mString0(string0), mString0Length(length0), mString1(string1), mString1Length(length1) {} ~DamerauLevenshteinEditDistancePolicy() {} AK_FORCE_INLINE float getSubstitutionCost(const int index0, const int index1) const { const int c0 = toBaseLowerCase(mString0[index0]); const int c1 = toBaseLowerCase(mString1[index1]); return (c0 == c1) ? 0.0f : 1.0f; } AK_FORCE_INLINE float getDeletionCost(const int index0, const int index1) const { return 1.0f; } AK_FORCE_INLINE float getInsertionCost(const int index0, const int index1) const { return 1.0f; } AK_FORCE_INLINE bool allowTransposition(const int index0, const int index1) const { const int c0 = toBaseLowerCase(mString0[index0]); const int c1 = toBaseLowerCase(mString1[index1]); if (index0 > 0 && index1 > 0 && c0 == toBaseLowerCase(mString1[index1 - 1]) && c1 == toBaseLowerCase(mString0[index0 - 1])) { return true; } return false; } AK_FORCE_INLINE float getTranspositionCost(const int index0, const int index1) const { return getSubstitutionCost(index0, index1); } AK_FORCE_INLINE int getString0Length() const { return mString0Length; } AK_FORCE_INLINE int getString1Length() const { return mString1Length; } private: DISALLOW_COPY_AND_ASSIGN (DamerauLevenshteinEditDistancePolicy); const int *const mString0; const int mString0Length; const int *const mString1; const int mString1Length; }; } // namespace latinime #endif // LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H native/jni/src/suggest/policyimpl/utils/edit_distance.h 0 → 100644 +70 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_EDIT_DISTANCE_H #define LATINIME_EDIT_DISTANCE_H #include "defines.h" #include "suggest/policyimpl/utils/edit_distance_policy.h" namespace latinime { class EditDistance { public: // CAVEAT: There may be performance penalty if you need the edit distance as an integer value. AK_FORCE_INLINE static float getEditDistance(const EditDistancePolicy *const policy) { const int beforeLength = policy->getString0Length(); const int afterLength = policy->getString1Length(); float dp[(beforeLength + 1) * (afterLength + 1)]; for (int i = 0; i <= beforeLength; ++i) { dp[(afterLength + 1) * i] = i * policy->getInsertionCost(i - 1, -1); } for (int i = 0; i <= afterLength; ++i) { dp[i] = i * policy->getDeletionCost(-1, i - 1); } for (int i = 0; i < beforeLength; ++i) { for (int j = 0; j < afterLength; ++j) { dp[(afterLength + 1) * (i + 1) + (j + 1)] = min( dp[(afterLength + 1) * i + (j + 1)] + policy->getInsertionCost(i, j), min(dp[(afterLength + 1) * (i + 1) + j] + policy->getDeletionCost(i, j), dp[(afterLength + 1) * i + j] + policy->getSubstitutionCost(i, j))); if (policy->allowTransposition(i, j)) { dp[(afterLength + 1) * (i + 1) + (j + 1)] = min( dp[(afterLength + 1) * (i + 1) + (j + 1)], dp[(afterLength + 1) * (i - 1) + (j - 1)] + policy->getTranspositionCost(i, j)); } } } if (DEBUG_EDIT_DISTANCE) { AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength); for (int i = 0; i < beforeLength + 1; ++i) { for (int j = 0; j < afterLength + 1; ++j) { AKLOGI("EDIT[%d][%d], %f", i, j, dp[(afterLength + 1) * i + j]); } } } return dp[(beforeLength + 1) * (afterLength + 1) - 1]; } private: DISALLOW_IMPLICIT_CONSTRUCTORS(EditDistance); }; } // namespace latinime #endif // LATINIME_EDIT_DISTANCE_H native/jni/src/suggest/policyimpl/utils/edit_distance_policy.h 0 → 100644 +43 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_EDIT_DISTANCE_POLICY_H #define LATINIME_EDIT_DISTANCE_POLICY_H #include "defines.h" namespace latinime { class EditDistancePolicy { public: virtual float getSubstitutionCost(const int index0, const int index1) const = 0; virtual float getDeletionCost(const int index0, const int index1) const = 0; virtual float getInsertionCost(const int index0, const int index1) const = 0; virtual bool allowTransposition(const int index0, const int index1) const = 0; virtual float getTranspositionCost(const int index0, const int index1) const = 0; virtual int getString0Length() const = 0; virtual int getString1Length() const = 0; protected: EditDistancePolicy() {} virtual ~EditDistancePolicy() {} private: DISALLOW_COPY_AND_ASSIGN(EditDistancePolicy); }; } // namespace latinime #endif // LATINIME_EDIT_DISTANCE_POLICY_H Loading
native/jni/src/correction.cpp +5 −42 Original line number Diff line number Diff line Loading @@ -23,6 +23,8 @@ #include "defines.h" #include "proximity_info_state.h" #include "suggest_utils.h" #include "suggest/policyimpl/utils/edit_distance.h" #include "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h" namespace latinime { Loading Loading @@ -906,50 +908,11 @@ inline static bool isUpperCase(unsigned short c) { return totalFreq; } /* Damerau-Levenshtein distance */ inline static int editDistanceInternal(int *editDistanceTable, const int *before, const int beforeLength, const int *after, const int afterLength) { // dp[li][lo] dp[a][b] = dp[ a * lo + b] int *dp = editDistanceTable; const int li = beforeLength + 1; const int lo = afterLength + 1; for (int i = 0; i < li; ++i) { dp[lo * i] = i; } for (int i = 0; i < lo; ++i) { dp[i] = i; } for (int i = 0; i < li - 1; ++i) { for (int j = 0; j < lo - 1; ++j) { const int ci = toBaseLowerCase(before[i]); const int co = toBaseLowerCase(after[j]); const int cost = (ci == co) ? 0 : 1; dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1]) && co == toBaseLowerCase(before[i - 1])) { dp[(i + 1) * lo + (j + 1)] = min( dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost); } } } if (DEBUG_EDIT_DISTANCE) { AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength); for (int i = 0; i < li; ++i) { for (int j = 0; j < lo; ++j) { AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]); } } } return dp[li * lo - 1]; } /* static */ int Correction::RankingAlgorithm::editDistance(const int *before, const int beforeLength, const int *after, const int afterLength) { int table[(beforeLength + 1) * (afterLength + 1)]; return editDistanceInternal(table, before, beforeLength, after, afterLength); const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein( before, beforeLength, after, afterLength); return static_cast<int>(EditDistance::getEditDistance(&daemaruLevenshtein)); } Loading
native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h 0 → 100644 +79 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H #define LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H #include "char_utils.h" #include "suggest/policyimpl/utils/edit_distance_policy.h" namespace latinime { class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy { public: DamerauLevenshteinEditDistancePolicy(const int *const string0, const int length0, const int *const string1, const int length1) : mString0(string0), mString0Length(length0), mString1(string1), mString1Length(length1) {} ~DamerauLevenshteinEditDistancePolicy() {} AK_FORCE_INLINE float getSubstitutionCost(const int index0, const int index1) const { const int c0 = toBaseLowerCase(mString0[index0]); const int c1 = toBaseLowerCase(mString1[index1]); return (c0 == c1) ? 0.0f : 1.0f; } AK_FORCE_INLINE float getDeletionCost(const int index0, const int index1) const { return 1.0f; } AK_FORCE_INLINE float getInsertionCost(const int index0, const int index1) const { return 1.0f; } AK_FORCE_INLINE bool allowTransposition(const int index0, const int index1) const { const int c0 = toBaseLowerCase(mString0[index0]); const int c1 = toBaseLowerCase(mString1[index1]); if (index0 > 0 && index1 > 0 && c0 == toBaseLowerCase(mString1[index1 - 1]) && c1 == toBaseLowerCase(mString0[index0 - 1])) { return true; } return false; } AK_FORCE_INLINE float getTranspositionCost(const int index0, const int index1) const { return getSubstitutionCost(index0, index1); } AK_FORCE_INLINE int getString0Length() const { return mString0Length; } AK_FORCE_INLINE int getString1Length() const { return mString1Length; } private: DISALLOW_COPY_AND_ASSIGN (DamerauLevenshteinEditDistancePolicy); const int *const mString0; const int mString0Length; const int *const mString1; const int mString1Length; }; } // namespace latinime #endif // LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
native/jni/src/suggest/policyimpl/utils/edit_distance.h 0 → 100644 +70 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_EDIT_DISTANCE_H #define LATINIME_EDIT_DISTANCE_H #include "defines.h" #include "suggest/policyimpl/utils/edit_distance_policy.h" namespace latinime { class EditDistance { public: // CAVEAT: There may be performance penalty if you need the edit distance as an integer value. AK_FORCE_INLINE static float getEditDistance(const EditDistancePolicy *const policy) { const int beforeLength = policy->getString0Length(); const int afterLength = policy->getString1Length(); float dp[(beforeLength + 1) * (afterLength + 1)]; for (int i = 0; i <= beforeLength; ++i) { dp[(afterLength + 1) * i] = i * policy->getInsertionCost(i - 1, -1); } for (int i = 0; i <= afterLength; ++i) { dp[i] = i * policy->getDeletionCost(-1, i - 1); } for (int i = 0; i < beforeLength; ++i) { for (int j = 0; j < afterLength; ++j) { dp[(afterLength + 1) * (i + 1) + (j + 1)] = min( dp[(afterLength + 1) * i + (j + 1)] + policy->getInsertionCost(i, j), min(dp[(afterLength + 1) * (i + 1) + j] + policy->getDeletionCost(i, j), dp[(afterLength + 1) * i + j] + policy->getSubstitutionCost(i, j))); if (policy->allowTransposition(i, j)) { dp[(afterLength + 1) * (i + 1) + (j + 1)] = min( dp[(afterLength + 1) * (i + 1) + (j + 1)], dp[(afterLength + 1) * (i - 1) + (j - 1)] + policy->getTranspositionCost(i, j)); } } } if (DEBUG_EDIT_DISTANCE) { AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength); for (int i = 0; i < beforeLength + 1; ++i) { for (int j = 0; j < afterLength + 1; ++j) { AKLOGI("EDIT[%d][%d], %f", i, j, dp[(afterLength + 1) * i + j]); } } } return dp[(beforeLength + 1) * (afterLength + 1) - 1]; } private: DISALLOW_IMPLICIT_CONSTRUCTORS(EditDistance); }; } // namespace latinime #endif // LATINIME_EDIT_DISTANCE_H
native/jni/src/suggest/policyimpl/utils/edit_distance_policy.h 0 → 100644 +43 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_EDIT_DISTANCE_POLICY_H #define LATINIME_EDIT_DISTANCE_POLICY_H #include "defines.h" namespace latinime { class EditDistancePolicy { public: virtual float getSubstitutionCost(const int index0, const int index1) const = 0; virtual float getDeletionCost(const int index0, const int index1) const = 0; virtual float getInsertionCost(const int index0, const int index1) const = 0; virtual bool allowTransposition(const int index0, const int index1) const = 0; virtual float getTranspositionCost(const int index0, const int index1) const = 0; virtual int getString0Length() const = 0; virtual int getString1Length() const = 0; protected: EditDistancePolicy() {} virtual ~EditDistancePolicy() {} private: DISALLOW_COPY_AND_ASSIGN(EditDistancePolicy); }; } // namespace latinime #endif // LATINIME_EDIT_DISTANCE_POLICY_H