Changeset 647 for trunk/lib


Ignore:
Timestamp:
Oct 25, 2018 12:35:07 AM (6 years ago)
Author:
anonymous
Message:

Update hyperlinkTxt() URL look-behind

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/Utilities.inc.php

    r644 r647  
    252252* @return   string          Same input text, but URLs hyperlinked.
    253253* @author   Quinn Comendant <quinn@strangecode.com>
    254 * @version  2.0
     254* @version  2.2
    255255* @since    22 Mar 2015 23:29:04
    256256*/
     
    261261
    262262    // Capture the full URL into the first match and only the first X characters into the second match.
    263     // This will match URLs not preceeded by " ' or = (URLs inside an attribute) or ` (Markdown quoted) or double-scheme (http://http://www.asdf.com)
     263    // This will match URLs not preceded by " ' or = (URLs inside an attribute) or ` (Markdown quoted) or double-scheme (http://http://www.asdf.com)
    264264    // Valid URL characters: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;=
    265265    $regex = '@
    266         \b                              # Start with a word-boundary.
    267         (?<!"|\'|=|>|`|[\w-]{2}://)     # Negative look-behind to exclude URLs already in <a> tag, Markdown quoted, or double SCHEME://
    268         (                               # Begin match 1
    269             (                           # Begin match 2
    270                 (?:%s)                  # URL starts with known scheme or www. if strict = false
    271                 [^\s/$.?#]+             # Any domain-valid characters
    272                 [^\s"`<>]{1,%s}         # Match 2 is limited to a maximum of LENGTH valid URL characters
     266        \b                                 # Start with a word-boundary.
     267        (?<!"|\'|=|>|`|\]\(|\[\d\] |[:/]/) # Negative look-behind to exclude URLs already in <a> tag, <tags>beween</tags>, `Markdown quoted`, [Markdown](link), [1] www.markdown.footnotes, and avoid broken:/ and doubled://schemes://
     268        (                                  # Begin match 1
     269            (                              # Begin match 2
     270                (?:%s)                     # URL starts with known scheme or www. if strict = false
     271                [^\s/$.?#]+                # Any domain-valid characters
     272                [^\s"`<>]{1,%s}            # Match 2 is limited to a maximum of LENGTH valid URL characters
    273273            )
    274             [^\s"`<>]*                  # Match 1 continues with any further valid URL characters
    275             ([^\P{Any}\s
<>«»"—–%s])    # Final character not a space or common end-of-sentence punctuation (.,:;?!, etc). Using double negation set, see http://stackoverflow.com/a/4786560/277303
     274            [^\s"`<>]*                     # Match 1 continues with any further valid URL characters
     275            ([^\P{Any}\s
<>«»"—–%s])       # Final character not a space or common end-of-sentence punctuation (.,:;?!, etc). Using double negation set, see http://stackoverflow.com/a/4786560/277303
    276276        )
    277277        @Suxi
Note: See TracChangeset for help on using the changeset viewer.