Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Unverified Commit 5931d46a authored by cketti's avatar cketti Committed by GitHub
Browse files

Merge pull request #3132 from k9mail/email_section_extractor

TextToHtml: Extract sections from a plain text email
parents 7af9ec1e cd26e829
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@ dependencies {
    compile project(':k9mail-library')
    compile project(':plugins:HoloColorPicker')
    compile project(':plugins:openpgp-api-lib:openpgp-api')
    compile "org.jetbrains.kotlin:kotlin-stdlib-jre7:${kotlinVersion}"
    compile "com.squareup.okio:okio:${okioVersion}"
    compile 'commons-io:commons-io:2.4'
    compile "com.android.support:support-v4:${androidSupportLibraryVersion}"
@@ -40,7 +41,6 @@ dependencies {

    androidTestCompile 'com.android.support.test.espresso:espresso-core:2.2.2'

    testCompile "org.jetbrains.kotlin:kotlin-stdlib-jre7:${kotlinVersion}"
    testCompile "org.robolectric:robolectric:${robolectricVersion}"
    testCompile "junit:junit:${junitVersion}"
    testCompile "com.google.truth:truth:${truthVersion}"
+117 −0
Original line number Diff line number Diff line
package com.fsck.k9.message.html

/**
 * Represents a section of an email's plain text body.
 *
 * See [EmailSectionExtractor].
 */
class EmailSection private constructor(builder: Builder) : CharSequence {
    val quoteDepth = builder.quoteDepth
    private val text = builder.text
    private val segments: List<Segment> = if (builder.indent == 0) {
        builder.segments.toList()
    } else {
        builder.segments.map { Segment(it.startIndex + builder.indent, it.endIndex) }
    }

    override val length = segments.map { it.endIndex - it.startIndex }.sum()

    override fun get(index: Int): Char {
        require(index in 0..(length - 1)) { "index: $index; length: $length" }

        var offset = index
        for (i in 0..(segments.size - 1)) {
            val segment = segments[i]
            val segmentLength = segment.endIndex - segment.startIndex
            if (offset < segmentLength) {
                return text[segment.startIndex + offset]
            }
            offset -= segmentLength
        }

        throw AssertionError()
    }

    override fun subSequence(startIndex: Int, endIndex: Int): CharSequence {
        require(startIndex in 0..(length - 1)) { "startIndex: $startIndex; length: $length" }
        require(endIndex in 0..length) { "endIndex: $endIndex; length: $length" }
        require(startIndex <= endIndex) { "startIndex > endIndex" }

        if (startIndex == endIndex) return ""
        if (startIndex == 0 && endIndex == length) return this

        val builder = Builder(text, quoteDepth)

        val (startSegmentIndex, startOffset) = findSegmentIndexAndOffset(startIndex)
        val (endSegmentIndex, endOffset) = findSegmentIndexAndOffset(endIndex, isEndIndex = true)
        val startSegment = segments[startSegmentIndex]

        if (startSegmentIndex == endSegmentIndex) {
            builder.addSegment(0, startSegment.startIndex + startOffset, startSegment.startIndex + endOffset)
            return builder.build()
        }

        if (startOffset == 0) {
            builder.addSegment(startSegment)
        } else {
            builder.addSegment(0, startSegment.startIndex + startOffset, startSegment.endIndex)
        }

        for (segmentIndex in startSegmentIndex + 1 until endSegmentIndex) {
            builder.addSegment(segments[segmentIndex])
        }

        val endSegment = segments[endSegmentIndex]
        if (endSegment.startIndex + endOffset == endSegment.endIndex) {
            builder.addSegment(endSegment)
        } else {
            builder.addSegment(0, endSegment.startIndex, endSegment.startIndex + endOffset)
        }

        return builder.build()
    }

    private fun findSegmentIndexAndOffset(index: Int, isEndIndex: Boolean = false): Pair<Int, Int> {
        var offset = index
        segments.forEachIndexed { segmentIndex, segment ->
            val segmentLength = segment.endIndex - segment.startIndex
            if (offset < segmentLength || (isEndIndex && offset == segmentLength)) {
                return Pair(segmentIndex, offset)
            }
            offset -= segmentLength
        }

        throw AssertionError()
    }

    override fun toString() = StringBuilder().apply {
        segments.forEach {
            append(text, it.startIndex, it.endIndex)
        }
    }.toString()


    internal data class Segment(val startIndex: Int, val endIndex: Int)

    class Builder(val text: String, val quoteDepth: Int) {
        internal val segments: MutableList<Segment> = mutableListOf()
        internal var indent = Int.MAX_VALUE

        val hasSegments
            get() = !segments.isEmpty()

        fun addSegment(leadingSpaces: Int, startIndex: Int, endIndex: Int): Builder {
            indent = minOf(indent, leadingSpaces)
            segments.add(Segment(startIndex, endIndex))
            return this
        }

        internal fun addSegment(segment: Segment) {
            indent = 0
            segments.add(segment)
        }

        fun build() = EmailSection(this)
    }
}
+125 −0
Original line number Diff line number Diff line
package com.fsck.k9.message.html

/**
 * Extract sections from a plain text email.
 *
 * A section consists of all consecutive lines of the same quote depth. Quote characters and spaces at the beginning of
 * a line are stripped and not part of the section's content.
 *
 * ### Example:
 *
 * ```
 * On 2018-01-25 Alice <alice@example.com> wrote:
 * > Hi Bob
 *
 * Hi Alice
 * ```
 *
 * This message consists of three sections with the following contents:
 * * `On 2018-01-25 Alice <alice@example.com> wrote:`
 * * `Hi Bob`
 * * `Hi Alice`
 */
class EmailSectionExtractor private constructor(val text: String) {
    private val sections = mutableListOf<EmailSection>()
    private var sectionBuilder = EmailSection.Builder(text, 0)
    private var sectionStartIndex = 0
    private var newlineIndex = -1
    private var startOfContentIndex = 0
    private var isStartOfLine = true
    private var spaces = 0
    private var quoteDepth = 0
    private var currentQuoteDepth = 0

    fun extract(): List<EmailSection> {
        text.forEachIndexed { index, character ->
            if (isStartOfLine) {
                detectQuoteCharacters(index, character)
            } else if (character == '\n') {
                addQuotedLineToSection(endIndex = index + 1)
            }

            if (character == '\n') {
                newlineIndex = index
                resetForStartOfLine()
            }
        }

        completeLastSection()

        return sections
    }

    private fun detectQuoteCharacters(index: Int, character: Char) {
        when (character) {
            ' ' -> spaces++
            '>' -> {
                currentQuoteDepth++
                spaces = 0
            }
            '\n' -> {
                if (quoteDepth == currentQuoteDepth) {
                    addQuotedLineToSection(startIndex = index - spaces, endIndex = index + 1)
                } else {
                    finishSection(index + 1)
                    sectionStartIndex = index - spaces
                }
            }
            else -> {
                isStartOfLine = false
                startOfContentIndex = index - spaces
                if (quoteDepth != currentQuoteDepth) {
                    finishSection(newlineIndex + 1)
                    sectionStartIndex = startOfContentIndex
                }
            }
        }
    }

    private fun addUnquotedLineToSection(endIndex: Int) {
        if (quoteDepth == 0 && sectionStartIndex != endIndex) {
            sectionBuilder.addSegment(0, sectionStartIndex, endIndex)
        }
    }

    private fun addQuotedLineToSection(startIndex: Int = startOfContentIndex, endIndex: Int) {
        if (currentQuoteDepth > 0) {
            sectionBuilder.addSegment(spaces, startIndex, endIndex)
        }
    }

    private fun finishSection(endIndex: Int) {
        addUnquotedLineToSection(endIndex)
        appendSection()
        sectionBuilder = EmailSection.Builder(text, currentQuoteDepth)
        quoteDepth = currentQuoteDepth
    }

    private fun completeLastSection() {
        if (!isStartOfLine) {
            if (quoteDepth == 0) {
                sectionBuilder.addSegment(0, sectionStartIndex, text.length)
            } else {
                sectionBuilder.addSegment(spaces, startOfContentIndex, text.length)
            }
        }

        appendSection()
    }

    private fun appendSection() {
        if (sectionBuilder.hasSegments) {
            sections.add(sectionBuilder.build())
        }
    }

    private fun resetForStartOfLine() {
        isStartOfLine = true
        currentQuoteDepth = 0
        spaces = 0
    }

    companion object {
        fun extract(text: String) = EmailSectionExtractor(text).extract()
    }
}
+113 −0
Original line number Diff line number Diff line
package com.fsck.k9.message.html


import com.google.common.truth.Truth.assertThat
import org.junit.Test


class EmailSectionExtractorTest {
    @Test
    fun simpleMessageWithoutQuotes() {
        val message = """
            Hi Alice,

            are we still on for new Thursday?

            Best
            Bob
            """.trimIndent()

        val sections = EmailSectionExtractor.extract(message)

        assertThat(sections.size).isEqualTo(1)
        with(sections[0]) {
            assertThat(quoteDepth).isEqualTo(0)
            assertThat(toString()).isEqualTo(message)
        }
    }

    @Test
    fun quoteFollowedByReply() {
        val message = """
            Alice <alice@example.org> wrote:
            > Hi there

            Hi, what's up?
            """.trimIndent()

        val sections = EmailSectionExtractor.extract(message)

        assertThat(sections.size).isEqualTo(3)
        with(sections[0]) {
            assertThat(quoteDepth).isEqualTo(0)
            assertThat(toString()).isEqualTo("Alice <alice@example.org> wrote:\n")
        }
        with(sections[1]) {
            assertThat(quoteDepth).isEqualTo(1)
            assertThat(toString()).isEqualTo("Hi there\n")
        }
        with(sections[2]) {
            assertThat(quoteDepth).isEqualTo(0)
            assertThat(toString()).isEqualTo("\nHi, what's up?")
        }
    }

    @Test
    fun replyFollowedByTwoQuoteLevels() {
        val message = """
            Three

            Bob <bob@example.org> wrote:
            > Two
            >${" "}
            > Alice <alice@example.org> wrote:
            >> One
            """.trimIndent()

        val sections = EmailSectionExtractor.extract(message)

        assertThat(sections.size).isEqualTo(3)
        with(sections[0]) {
            assertThat(quoteDepth).isEqualTo(0)
            assertThat(toString()).isEqualTo("Three\n\nBob <bob@example.org> wrote:\n")
        }
        with(sections[1]) {
            assertThat(quoteDepth).isEqualTo(1)
            assertThat(toString()).isEqualTo("Two\n\nAlice <alice@example.org> wrote:\n")
        }
        with(sections[2]) {
            assertThat(quoteDepth).isEqualTo(2)
            assertThat(toString()).isEqualTo("One")
        }
    }

    @Test
    fun chaosQuoting() {
        val message = """
            >>> One
            > Three
            Four
            >> Two${"\n"}
            """.trimIndent()

        val sections = EmailSectionExtractor.extract(message)

        assertThat(sections.size).isEqualTo(4)
        with(sections[0]) {
            assertThat(quoteDepth).isEqualTo(3)
            assertThat(toString()).isEqualTo("One\n")
        }
        with(sections[1]) {
            assertThat(quoteDepth).isEqualTo(1)
            assertThat(toString()).isEqualTo("Three\n")
        }
        with(sections[2]) {
            assertThat(quoteDepth).isEqualTo(0)
            assertThat(toString()).isEqualTo("Four\n")
        }
        with(sections[3]) {
            assertThat(quoteDepth).isEqualTo(2)
            assertThat(toString()).isEqualTo("Two\n")
        }
    }
}
+94 −0
Original line number Diff line number Diff line
package com.fsck.k9.message.html


import com.google.common.truth.Truth.assertThat
import org.junit.Test


class EmailSectionTest {
    @Test
    fun charAt() {
        assertThat("[a]".asEmailSection()[0]).isEqualTo('a')
        assertThat(".[a]".asEmailSection()[0]).isEqualTo('a')
        assertThat("[a].".asEmailSection()[0]).isEqualTo('a')
        assertThat("[ a]".asEmailSection()[0]).isEqualTo('a')
        assertThat("[abc]".asEmailSection()[0]).isEqualTo('a')

        assertThat("[a][b]".asEmailSection()[1]).isEqualTo('b')
        assertThat("[a][bc]".asEmailSection()[1]).isEqualTo('b')
        assertThat("[ab]".asEmailSection()[1]).isEqualTo('b')
        assertThat("[ab][c]".asEmailSection()[1]).isEqualTo('b')
        assertThat("[a][b][c]".asEmailSection()[1]).isEqualTo('b')
        assertThat(".[a][b][c]".asEmailSection()[1]).isEqualTo('b')
        assertThat(".[a].[b][c]".asEmailSection()[1]).isEqualTo('b')
        assertThat(".[a].[b].[c]".asEmailSection()[1]).isEqualTo('b')
        assertThat("[ a][ b][ c]".asEmailSection()[1]).isEqualTo('b')
        assertThat("[a]..[bc]".asEmailSection()[1]).isEqualTo('b')

        assertThat("[abc]".asEmailSection()[2]).isEqualTo('c')
        assertThat("[ab][c]".asEmailSection()[2]).isEqualTo('c')
        assertThat("[a][bc]".asEmailSection()[2]).isEqualTo('c')
        assertThat("[a][b][c]".asEmailSection()[2]).isEqualTo('c')
        assertThat(".[a].[b].[c].".asEmailSection()[2]).isEqualTo('c')
        assertThat("[  a][  b][  c]".asEmailSection()[2]).isEqualTo('c')
    }

    @Test
    fun length() {
        assertThat("[]".asEmailSection().length).isEqualTo(0)
        assertThat("...[]...".asEmailSection().length).isEqualTo(0)
        assertThat("[  ]".asEmailSection().length).isEqualTo(0)
        assertThat("[ ][  ]".asEmailSection().length).isEqualTo(1)
        assertThat("[One]".asEmailSection().length).isEqualTo(3)
        assertThat("[One][Two]".asEmailSection().length).isEqualTo(6)
    }

    @Test
    fun subSequence() {
        val section = "[ One][ Two][ Three]".asEmailSection()

        assertThat(section.subSequence(0, 11)).isSameAs(section)
        assertThat(section.subSequence(0, 3).asString()).isEqualTo("One")
        assertThat(section.subSequence(0, 2).asString()).isEqualTo("On")
        assertThat(section.subSequence(1, 3).asString()).isEqualTo("ne")
        assertThat(section.subSequence(1, 2).asString()).isEqualTo("n")
        assertThat(section.subSequence(0, 4).asString()).isEqualTo("OneT")
        assertThat(section.subSequence(1, 4).asString()).isEqualTo("neT")
        assertThat(section.subSequence(1, 6).asString()).isEqualTo("neTwo")
        assertThat(section.subSequence(1, 7).asString()).isEqualTo("neTwoT")
        assertThat(section.subSequence(1, 11).asString()).isEqualTo("neTwoThree")
        assertThat(section.subSequence(3, 11).asString()).isEqualTo("TwoThree")
        assertThat(section.subSequence(4, 11).asString()).isEqualTo("woThree")
        assertThat(section.subSequence(4, 9).asString()).isEqualTo("woThr")
        assertThat(section.subSequence(6, 9).asString()).isEqualTo("Thr")
        assertThat(section.subSequence(7, 10).asString()).isEqualTo("hre")
        assertThat(section.subSequence(6, 11).asString()).isEqualTo("Three")
    }


    private fun CharSequence.asString() = StringBuilder(length).apply {
        this@asString.forEach { append(it) }
    }.toString()

    private fun String.asEmailSection(): EmailSection {
        val builder = EmailSection.Builder(this, 0)

        var startIndex = -1
        var isStartOfLine = true
        var spaces = 0
        this.forEachIndexed { index, c ->
            when (c) {
                '[' -> {
                    startIndex = index + 1
                    isStartOfLine = true
                    spaces = 0
                }
                ' ' -> if (isStartOfLine) spaces++
                ']' -> builder.addSegment(spaces, startIndex, index)
                else -> isStartOfLine = false
            }
        }

        return builder.build()
    }
}