Tag: functions
UTF8 Conversion Functions
by z3n on Apr.24, 2010, under Coding, Tips & Hints
Problem:
There are many issues when you run across a non utf8 string and even worse a mixed variable, that could or couldn’t be utf8 at same time.
Solution:
Based on my previous postings i’ve improved my utf8 functions taking as referral this great post.
define('_is_utf8_split',5000);
function utf8_encode_array(&$x) {
if (is_array($x)) {
foreach ($x as &$v) // loop through arrays and/or items
$v=utf8_encode_array($v);
return $x;
} else // not array
return !is_utf8($x) ? utf8_encode($x) : $x;
}
function to_utf8($x) { // v1.01
/*
This function will convert a string or an array to utf8.
The input can have mixed encodings.
-- 100424
*/
if (!is_utf8($x)) {
if (is_array($x)) {
foreach ($x as &$v) {
$v=to_utf8($v);
}
} else {
$x=utf8_encode($x);
}
}
return $x;
}
function is_utf8($string) { // v1.03
if (is_array($string)) {
foreach ($string as $v) {
if (is_utf8($string))
return true;
}
return false;
} elseif (strlen($string) > _is_utf8_split) {
// Based on: http://mobile-website.mobi/php-utf8-vs-iso-8859-1-59
for ($s=$i=0,$j=ceil(strlen($string)/_is_utf8_split);$i < $j;$i++,$s+=_is_utf8_split) {
if (!is_utf8(substr($string,$s,_is_utf8_split)))
return false;
}
return true;
} else {
// From http://w3.org/International/questions/qa-forms-utf-8.html
return preg_match('%^(?:
[\x09\x0A\x0D\x20-\x7E] # ASCII
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)*$%xs', $string);
}
}
function _r_json($x) {
$x=utf8_encode_array($x);
echo json_encode($x);
}