php unicode 中文互转
2022-03-24 PHP 4967
/** * $str 原始中文字符串 * $encoding 原始字符串的编码,默认GBK * $prefix 编码后的前缀,默认"&#" * $postfix 编码后的后缀,默认";" */ function unicode_encode($str, $encoding = 'GBK', $prefix = '&#', $postfix = ';') { $str = iconv($encoding, 'UCS-2', $str); $arrstr = str_split($str, 2); $unistr = ''; for($i = 0, $len = count($arrstr); $i < $len; $i++) { $dec = hexdec(bin2hex($arrstr[$i])); $unistr .= $prefix . $dec . $postfix; } return $unistr; } /** * $str Unicode编码后的字符串 * $decoding 原始字符串的编码,默认utf-8 * $prefix 编码字符串的前缀,默认"&#" * $postfix 编码字符串的后缀,默认";" */ function unicode_decode($unistr, $encoding = 'utf-8', $prefix = '&#', $postfix = ';') { $arruni = explode($prefix, $unistr); $unistr = ''; for ($i = 1, $len = count($arruni); $i < $len; $i++) { if (strlen($postfix) > 0) { $arruni[$i] = substr($arruni[$i], 0, strlen($arruni[$i]) - strlen($postfix)); } $temp = intval($arruni[$i]); $unistr .= ($temp < 256) ? chr(0) . chr($temp) : chr(intval($temp / 256)) . chr($temp % 256); } return iconv('UCS-2', $encoding, $unistr); } //中文转unicode function zh2unicode($str){ return unicode_encode($str, 'GBK', "\\u", ''); } //unicode转中文 function unicode2zh($code){ return unicode_decode($code, 'GBK', "\\u", ''); } $str = '你好'; echo zh2unicode($str); echo unicode2zh("\u60\u98\u62\u28003\u29362\u12477\u60\u47\u98\u62"); #unicode转中文 function unicodeDecode($unicode_str){ $json = '{"str":"'.$unicode_str.'"}'; $arr = json_decode($json,true); if(empty($arr)) return ''; return $arr['str']; } #中文转unicode function UnicodeEncode($str){ //split word preg_match_all('/./u',$str,$matches); $unicodeStr = ""; foreach($matches[0] as $m){ //拼接 $unicodeStr .= "&#".base_convert(bin2hex(iconv('UTF-8',"UCS-4",$m)),16,10); } return $unicodeStr; } function unescape($str) { $str = rawurldecode($str); preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/U", $str, $r); $ar = $r[0]; foreach ($ar as $k => $v) { if (substr($v, 0, 2) == "%u"){ $ar[$k] = iconv("UCS-2", "UTF-8", pack("H4", substr($v, -4))); }elseif (substr($v, 0, 3) == "&#x"){ $ar[$k] = iconv("UCS-2", "UTF-8", pack("H4", substr($v, 3, -1))); }elseif (substr($v, 0, 2) == "&#") { // echo "\n"; // echo substr($v, 2, -1); // echo "\n"; //$ar[$k] = iconv("UCS-2", "UTF-8", pack("n", substr($v, 2, -1))); $ar[$k] = mb_convert_encoding (pack("n", substr($v, 2, -1)),"UTF-8","UCS-2"); } } return join("", $ar); }
很赞哦! (0)
相关文章
文章评论
-
-
-
0条评论