123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899 |
- <?php
- namespace dokuwiki\Utf8;
- class Asian
- {
-
- const REGEXP =
- '(?:' .
- '[\x{0E00}-\x{0E7F}]' .
- '|' .
- '[' .
- '\x{2E80}-\x{3040}' .
- '\x{309D}-\x{30A0}' .
- '\x{30FD}-\x{31EF}\x{3200}-\x{D7AF}' .
- '\x{F900}-\x{FAFF}' .
- '\x{FE30}-\x{FE4F}' .
- "\xF0\xA0\x80\x80-\xF0\xAA\x9B\x9F" .
- "\xF0\xAA\x9C\x80-\xF0\xAB\x9C\xBF" .
- "\xF0\xAB\x9D\x80-\xF0\xAB\xA0\x9F" .
- "\xF0\xAF\xA0\x80-\xF0\xAF\xAB\xBF" .
- ']' .
- '|' .
- '[' .
- '\x{3042}\x{3044}\x{3046}\x{3048}' .
- '\x{304A}-\x{3062}\x{3064}-\x{3082}' .
- '\x{3084}\x{3086}\x{3088}-\x{308D}' .
- '\x{308F}-\x{3094}' .
- '\x{30A2}\x{30A4}\x{30A6}\x{30A8}' .
- '\x{30AA}-\x{30C2}\x{30C4}-\x{30E2}' .
- '\x{30E4}\x{30E6}\x{30E8}-\x{30ED}' .
- '\x{30EF}-\x{30F4}\x{30F7}-\x{30FA}' .
- '][' .
- '\x{3041}\x{3043}\x{3045}\x{3047}\x{3049}' .
- '\x{3063}\x{3083}\x{3085}\x{3087}\x{308E}\x{3095}-\x{309C}' .
- '\x{30A1}\x{30A3}\x{30A5}\x{30A7}\x{30A9}' .
- '\x{30C3}\x{30E3}\x{30E5}\x{30E7}\x{30EE}\x{30F5}\x{30F6}\x{30FB}\x{30FC}' .
- '\x{31F0}-\x{31FF}' .
- ']?' .
- ')';
-
- public static function isAsianWords($term)
- {
- return (bool)preg_match('/' . self::REGEXP . '/u', $term);
- }
-
- public static function separateAsianWords($text, $sep = ' ')
- {
-
- $asia = @preg_replace('/(' . self::REGEXP . ')/u', $sep . '\1' . $sep, $text);
- if (!is_null($asia)) $text = $asia;
- return $text;
- }
-
- public static function splitAsianWords($term)
- {
- return preg_split('/(' . self::REGEXP . '+)/u', $term, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
- }
- }
|