Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7b01862a authored by yanglv's avatar yanglv
Browse files

Mms: Web link can't be identified if it follows Chinese without space

If the web url follows Chinese character without space and it starts
with "www.", the original regex can't match it, then all words will
be seen as a link.

Change regex matcher to prevent links to contain chinese words.

Change-Id: I7145e158ae410641921fcfc93aaca7250ecf492c
parent f3b7c05d
Loading
Loading
Loading
Loading
+11 −6
Original line number Diff line number Diff line
@@ -107,16 +107,24 @@ public class Patterns {
    public static final String GOOD_IRI_CHAR =
        "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";

    /**
     * Match the characters without containing chinese characters
     * @hide
     */
    public static final String GOOD_IRI_HOST_CHAR =
        "a-zA-Z0-9\u00A0-\u4DFF\u9FA6-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";

    /**
     *  Regular expression pattern to match most part of RFC 3987
     *  Internationalized URLs, aka IRIs.  Commonly used Unicode characters are
     *  added.
     */
    public static final Pattern WEB_URL = Pattern.compile(
        "((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
        "((?=[^\u0391-\uffe5])(?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/"
        + "(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
        + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
        + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
        + "((?:(?:[" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]{0,64}\\.)+"   // named host
        + "((?:(?:[" + GOOD_IRI_HOST_CHAR + "][" + GOOD_IRI_CHAR + "\\-]{0,64}\\.)+"   // named host
        + TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL
        + "|(?:(?:25[0-5]|2[0-4]" // or ip address
        + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
@@ -125,10 +133,7 @@ public class Patterns {
        + "|[1-9][0-9]|[0-9])))"
        + "(?:\\:\\d{1,5})?)" // plus option port number
        + "(\\/(?:(?:[" + GOOD_IRI_CHAR + "\\;\\/\\?\\:\\@\\&\\=\\#\\~"  // plus option query params
        + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
        + "(?:\\b|$)"); // and finally, a word boundary or end of
                        // input.  This is to stop foo.sure from
                        // matching as foo.su
        + "\\-\\.\\+\\!\\*\\'\\(\\)\\_])|(?:\\,[" + GOOD_IRI_CHAR + "])|(?:\\%[a-fA-F0-9]{2}))*)?");

    public static final Pattern IP_ADDRESS
        = Pattern.compile(