hvordan gør jeg egentlig? :) for jeg kender kun utf8_decode
function utf162utf8($utf16)
{
// oh please oh please oh please oh please oh please
if(function_exists('mb_convert_encoding')) {
return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
}
$bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
switch(true) {
case ((0x7F & $bytes) == $bytes):
// this case should never be reached, because we are in ASCII range
// see:
http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return chr(0x7F & $bytes);
case (0x07FF & $bytes) == $bytes:
// return a 2-byte UTF-8 character
// see:
http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return chr(0xC0 | (($bytes >> 6) & 0x1F))
. chr(0x80 | ($bytes & 0x3F));
case (0xFFFF & $bytes) == $bytes:
// return a 3-byte UTF-8 character
// see:
http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return chr(0xE0 | (($bytes >> 12) & 0x0F))
. chr(0x80 | (($bytes >> 6) & 0x3F))
. chr(0x80 | ($bytes & 0x3F));
}
// ignoring UTF-32 for now, sorry
return '';
}
/**
* convert a string from one UTF-8 char to one UTF-16 char
*
* Normally should be handled by mb_convert_encoding, but
* provides a slower PHP-only method for installations
* that lack the multibye string extension.
*
* @param string $utf8 UTF-8 character
* @return string UTF-16 character
* @access private
*/
function utf82utf16($utf8)
{
// oh please oh please oh please oh please oh please
if(function_exists('mb_convert_encoding')) {
return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8');
}
switch(strlen($utf8)) {
case 1:
// this case should never be reached, because we are in ASCII range
// see:
http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return $utf8;
case 2:
// return a UTF-16 character from a 2-byte UTF-8 char
// see:
http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return chr(0x07 & (ord($utf8{0}) >> 2))
. chr((0xC0 & (ord($utf8{0}) << 6))
| (0x3F & ord($utf8{1})));
case 3:
// return a UTF-16 character from a 3-byte UTF-8 char
// see:
http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return chr((0xF0 & (ord($utf8{0}) << 4))
| (0x0F & (ord($utf8{1}) >> 2)))
. chr((0xC0 & (ord($utf8{1}) << 6))
| (0x7F & ord($utf8{2})));
}
// ignoring UTF-32 for now, sorry
return '';
}
/**
* encodes an arbitrary variable into JSON format
*
* @param mixed $var any number, boolean, string, array, or object to be encoded.
* see argument 1 to Services_JSON() above for array-parsing behavior.
* if var is a strng, note that encode() always expects it
* to be in ASCII or UTF-8 format!
*
* @return mixed JSON string representation of input var or an error if a problem occurs
* @access public
*/
function encode($var)
{
switch (gettype($var)) {
case 'boolean':
return $var ? 'true' : 'false';
case 'NULL':
return 'null';
case 'integer':
return (int) $var;
case 'double':
case 'float':
return (float) $var;
case 'string':
// STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
$ascii = '';
$strlen_var = strlen($var);
/*
* Iterate over every character in the string,
* escaping with a slash or encoding to UTF-8 where necessary
*/
for ($c = 0; $c < $strlen_var; ++$c) {
$ord_var_c = ord($var{$c});
switch (true) {
case $ord_var_c == 0x08:
$ascii .= '\b';
break;
case $ord_var_c == 0x09:
$ascii .= '\t';
break;
case $ord_var_c == 0x0A:
$ascii .= '\n';
break;
case $ord_var_c == 0x0C:
$ascii .= '\f';
break;
case $ord_var_c == 0x0D:
$ascii .= '\r';
break;
case $ord_var_c == 0x22:
case $ord_var_c == 0x2F:
case $ord_var_c == 0x5C:
// double quote, slash, slosh
$ascii .= '\\'.$var{$c};
break;
case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
// characters U-00000000 - U-0000007F (same as ASCII)
$ascii .= $var{$c};
break;
case (($ord_var_c & 0xE0) == 0xC0):
// characters U-00000080 - U-000007FF, mask 110XXXXX
// see
http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var{$c + 1}));
$c += 1;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
case (($ord_var_c & 0xF0) == 0xE0):
// characters U-00000800 - U-0000FFFF, mask 1110XXXX
// see
http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}));
$c += 2;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
case (($ord_var_c & 0xF8) == 0xF0):
// characters U-00010000 - U-001FFFFF, mask 11110XXX
// see
http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}));
$c += 3;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
case (($ord_var_c & 0xFC) == 0xF8):
// characters U-00200000 - U-03FFFFFF, mask 111110XX
// see
http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}),
ord($var{$c + 4}));
$c += 4;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
case (($ord_var_c & 0xFE) == 0xFC):
// characters U-04000000 - U-7FFFFFFF, mask 1111110X
// see
http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}),
ord($var{$c + 4}),
ord($var{$c + 5}));
$c += 5;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
}
}
return '"'.$ascii.'"';
det er sgu meget kode for den smule er det ikke?