Extends
lithium\core\Adaptable
Checks if a given string is UTF-8 encoded and is valid UTF-8.
In _quick_ mode it will check only for non ASCII characters being used
indicating any multibyte encoding. Don't use quick mode for integrity
validation of UTF-8 encoded strings.
Parameters
- string $string The string to analyze.
- array $options Allows to toggle mode via the `'quick'` option, defaults to `false`.
Returns
boolean Returns `true` if the string is UTF-8.Source
public static function is($string, array $options = array()) {
$defaults = array('quick' => false);
$options += $defaults;
if ($options['quick']) {
$regex = '/[^\x09\x0A\x0D\x20-\x7E]/m';
} else {
$regex = '/\A(';
$regex .= '[\x09\x0A\x0D\x20-\x7E]'; // ASCII
$regex .= '|[\xC2-\xDF][\x80-\xBF]'; // non-overlong 2-byte
$regex .= '|\xE0[\xA0-\xBF][\x80-\xBF]'; // excluding overlongs
$regex .= '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'; // straight 3-byte
$regex .= '|\xED[\x80-\x9F][\x80-\xBF]'; // excluding surrogates
$regex .= '|\xF0[\x90-\xBF][\x80-\xBF]{2}'; // planes 1-3
$regex .= '|[\xF1-\xF3][\x80-\xBF]{3}'; // planes 4-15
$regex .= '|\xF4[\x80-\x8F][\x80-\xBF]{2}'; // plane 16
$regex .= ')*\z/m';
}
return (boolean) preg_match($regex, $string);
}