Changeset 722


Ignore:
Timestamp:
May 3, 2020 9:58:17 PM (4 years ago)
Author:
anonymous
Message:

Refactor URLSlug() and cleanFileName(). Add simplifyAccents().

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/FormValidator.inc.php

    r685 r722  
    6767        'notice' => ' sc-msg-notice ',
    6868        'success' => ' sc-msg-success ',
     69        'use_raise_msg' => false,
    6970    );
    7071
     
    151152    public function addError($form_name, $msg='', $type=MSG_ERR, $file=null, $line=null)
    152153    {
     154        $app =& App::getInstance();
     155
     156        if (true === $this->getParam('use_raise_msg')) {
     157            $app->raiseMsg($msg, $type, $file, $line);
     158        }
     159
    153160        $this->errors[] = array(
    154161            'name' => $form_name,
  • trunk/lib/Utilities.inc.php

    r718 r722  
    520520}
    521521
    522 /*
    523 * Converts a string into a URL-safe slug, removing spaces and non word characters.
    524 *
    525 * @access   public
    526 * @param    string  $str    String to convert.
    527 * @return   string          URL-safe slug.
    528 * @author   Quinn Comendant <quinn@strangecode.com>
    529 * @version  1.0
    530 * @since    18 Aug 2014 12:54:29
    531 */
    532 function URLSlug($str)
    533 {
    534     $slug = preg_replace(array('/\W+/u', '/^-+|-+$/u'), array('-', ''), $str);
    535     $slug = strtolower($slug);
    536     return $slug;
    537 }
    538 
    539522/**
    540523 * Return a human readable disk space measurement. Input value measured in bytes.
     
    623606}
    624607
    625 /**
    626  * Removes non-latin characters from file name, using htmlentities to convert known weirdos into regular squares.
     608/*
     609* Converts strange characters into ASCII using a htmlentities hack. If a character does not have a specific rule, it will remain as its entity name, e.g., `5¢` becomes `5&cent;` which becomes `5cent`.
     610*
     611* @access   public
     612* @param    string  $str    Input string of text containing accents.
     613* @return   string          String with accented characters converted to ASCII equivalents.
     614* @author   Quinn Comendant <quinn@strangecode.com>
     615* @since    30 Apr 2020 21:29:16
     616*/
     617function simplifyAccents($str)
     618{
     619    $app =& App::getInstance();
     620
     621    return preg_replace([
     622        '/&amp;(?=[\w\d#]{1,10};)/ui',
     623        '/&([a-z]{1,2})(?:acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml|caron);/ui',
     624        '/&(?:ndash|mdash|horbar);/ui',
     625        '/&(?:nbsp);/ui',
     626        '/&(?:bdquo|ldquo|ldquor|lsquo|lsquor|rdquo|rdquor|rsquo|rsquor|sbquo|lsaquo|rsaquo);/ui',
     627        '/&(?:amp);/ui', // This replacement must come after matching all other entities.
     628        '/[&;]+/u',
     629    ], [
     630        '&',
     631        '$1',
     632        '-',
     633        ' ',
     634        '',
     635        'and',
     636        '',
     637    ], htmlentities($str, ENT_NOQUOTES | ENT_IGNORE, $app->getParam('character_set')));
     638}
     639
     640/*
     641* Converts a string into a URL-safe slug, removing spaces and non word characters.
     642*
     643* @access   public
     644* @param    string  $str    String to convert.
     645* @return   string          URL-safe slug.
     646* @author   Quinn Comendant <quinn@strangecode.com>
     647* @version  1.0
     648* @since    18 Aug 2014 12:54:29
     649*/
     650function URLSlug($str)
     651{
     652    return strtolower(urlencode(preg_replace(['/[-\s–—.:;?!@#=+_\/\\\]+|(?:&nbsp;|&#160;|&ndash;|&#8211;|&mdash;|&#8212;|%c2%a0|%e2%80%93|%e2%80%9)+/u', '/-+/u', '/[^\w-]+/u', '/^-+|-+$/u'], ['-', '-', '', ''], simplifyAccents($str))));
     653}
     654
     655/**
     656 * Converts a string of text into a safe file name by removing non-ASCII characters and non-word characters.
    627657 *
    628658 * @access  public
     
    634664    $app =& App::getInstance();
    635665
    636     $file_name = preg_replace(array(
    637         '/&([a-z]{1,2})(?:acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml|caron);/ui',
    638         '/&(?:amp);/ui',
    639         '/[&;]+/u',
    640         '/[^a-zA-Z0-9()@._=+-]+/u',
    641         '/^_+|_+$/u'
    642     ), array(
    643         '$1',
    644         'and',
    645         '',
    646         '_',
    647         ''
    648     ), htmlentities($file_name, ENT_NOQUOTES | ENT_IGNORE, $app->getParam('character_set')));
     666    $file_name = preg_replace(['/[^a-zA-Z0-9()@._=+-]+/u', '/^_+|_+$/u'], ['_', ''], simplifyAccents($file_name));
    649667    return mb_substr($file_name, 0, 250);
    650668}
  • trunk/tests/UtilitiesTest.php

    r653 r722  
    283283        }
    284284    }
     285
     286    function test_URLSlug()
     287    {
     288        $strings = [
     289            'This becomes a slug' => 'this-becomes-a-slug',
     290            'http://䟋子.卷筒纞/?x=y&1=2#asdf' => 'http-%e4%be%8b%e5%ad%90-%e5%8d%b7%e7%ad%92%e7%ba%b8-x-yand1-2-asdf',
     291            'Ä À Ö ö Ü ÃŒ ß a Æ Ê Ø Þ Ã
     292 Ã¥' => 'a-a-o-o-u-u-sz-a-ae-ae-o-o-a-a',
     293            ' - ' => '',
     294            '' => '',
     295            ' ' => '',
     296            '?' => '',
     297            'a file.JPG' => 'a-file-jpg',
     298            '/a/path/file.JPG' => 'a-path-file-jpg',
     299            '/a/directory/' => 'a-directory',
     300            '/' => '',
     301            'this-is-already-a-slug' => 'this-is-already-a-slug',
     302            'HTML entities&nbsp;ok&mdash;or not!' => 'html-entities-ok-or-not',
     303            '<p>tags
</p>' => 'ltpgttagshelliplt-pgt',
     304            'Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.' => 'lorem-ipsum-dolor-sit-amet-consetetur-sadipscing-elitr-sed-diam-nonumy-eirmod-tempor-invidunt-ut-labore-et-dolore-magna-aliquyam-erat-sed-diam-voluptua-at-vero-eos-et-accusam-et-justo-duo-dolores-et-ea-rebum-stet-clita-kasd-gubergren-no-sea-takimata-sanctus-est-lorem-ipsum-dolor-sit-amet-lorem-ipsum-dolor-sit-amet-consetetur-sadipscing-elitr-sed-diam-nonumy-eirmod-tempor-invidunt-ut-labore-et-dolore-magna-aliquyam-erat-sed-diam-voluptua-at-vero-eos-et-accusam-et-justo-duo-dolores-et-ea-rebum-stet-clita-kasd-gubergren-no-sea-takimata-sanctus-est-lorem-ipsum-dolor-sit-amet-lorem-ipsum-dolor-sit-amet-consetetur-sadipscing-elitr-sed-diam-nonumy-eirmod-tempor-invidunt-ut-labore-et-dolore-magna-aliquyam-erat-sed-diam-voluptua-at-vero-eos-et-accusam-et-justo-duo-dolores-et-ea-rebum-stet-clita-kasd-gubergren-no-sea-takimata-sanctus-est-lorem-ipsum-dolor-sit-amet',
     305            // Binary data.
     306            'xÔ‡-]ɯpı
     307o˜;ǯ)0ñ]à
     308fhH¡Êg+£KˇH
     309˙n*ƃ
     310‡Í˜ÌQÒõ.◊ulÌ≠˘yÁ˜ïu∫
     311ݢ>kƒ∑Êì+¯∞˛ ‘E
     312¬±Âh”U÷%>≈fifi≀1
     313' => 'xodagger-emacrp%c4%b1-otildecmacr0nafhhiexcleg-poundk%cb%87h-nedaggeritildeuqoo-lozulineyatildeiuintcentgtkfnofsumaei-macrinfin-enotplusmnahudividegtasymp%ef%ac%81%ef%ac%81le1',
     314            '-- repeated - characters -- so---extra - - - look-out!' => 'repeated-characters-so-extra-look-out',
     315            'NesthÀkchen\'s Teenage Years' => 'nesthakchens-teenage-years',
     316            'ገχαρΜεῖς Akharneîs' => '%e1%bc%88chialpharhonuepsilon%e1%bf%96sigmaf-akharneis',
     317            'АМтПМ ПавлПвОч ЧеÑ
     318Пв' => '%d0%90%d0%bd%d1%82%d0%be%d0%bd-%d0%9f%d0%b0%d0%b2%d0%bb%d0%be%d0%b2%d0%b8%d1%87-%d0%a7%d0%b5%d1%85%d0%be%d0%b2',
     319            'Demain dÚs l\'aube' => 'demain-des-laube',
     320            'Demain dÚs l’aube' => 'demain-des-laube',
     321        ];
     322        foreach ($strings as $input => $expected) {
     323            // printf("%s\n", URLSlug($input));
     324            $result = URLSlug($input);
     325            $this->assertEquals($expected, $result, sprintf('Failed with input: %s', $input));
     326        }
     327    }
    285328}
Note: See TracChangeset for help on using the changeset viewer.