gpt4 book ai didi

php - JSON_BIGINT_AS_STRING 向后移植

转载 作者:行者123 更新时间:2023-12-02 03:08:36 27 4
gpt4 key购买 nike

据我了解,JSON_BIGINT_AS_STRING 仅适用于 5.4+。我需要在 5.1+ 环境中重现相同的行为。该代码是分布式的,我无法控制它将运行的环境。我有哪些选择(除了编写自己的 json 解析器)?有没有成熟且经过充分测试的解决方案?

最佳答案

这是一个相当棘手的问题,但这是 Service_JSON 的修改版本。已经使用了一段时间了,效果很好;

大多数正则表达式解决方案都会失败此测试

$json = "[999999999999999999]"; 
var_dump(json_decode(preg_replace('/("\w+"):(\d+)/', '\\1:"\\2"', $json)));

输出

array (size=1)
0 => float 1.0E+18

使用服务 json

$json = new ServicesJSON();
$var = $json->decode("[999999999999999999]");
var_dump($var);

输出

array (size=1)
0 => string '999999999999999999' (length=18)

更复杂的 Json

$data = '{
"glossary": {
"title": "example glossary",
"num1":"343434343434343434343",
"num2":1112222333344455566,
"num3":[223232331321323,"23232343545454",3434343434334],
"GlossDiv": {
"title": "S",
"GlossList": {
"GlossEntry": {
"ID": "SGML",
"SortAs": "SGML",
"GlossTerm": "Standard Generalized Markup Language",
"Acronym": "SGML",
"Abbrev": "ISO 8879:1986",
"GlossDef": {
"para": "A meta-markup language, used to create markup languages such as DocBook.",
"GlossSeeAlso": ["GML", "XML"],
"num1":"343434343434343434343",
"num2":1112222333344455566,
"num3":[223232331321323,"23232343545454",3434343434334]
},
"GlossSee": "markup",
"num1":"343434343434343434343",
"num2":1112222333344455566,
"num3":[223232331321323,"23232343545454",3434343434334],
"num4":[999999999999999999]
}
}
}
}
}';


var_dump($json->decode($data,128));

输出

object(stdClass)[2]
public 'glossary' =>
object(stdClass)[3]
public 'title' => string 'example glossary' (length=16)
public 'num1' => string '343434343434343434343' (length=21)
public 'num2' => string '1112222333344455566' (length=19)
public 'num3' =>
array (size=3)
0 => string '223232331321323' (length=15)
1 => string '23232343545454' (length=14)
2 => string '3434343434334' (length=13)
public 'GlossDiv' =>
object(stdClass)[4]
public 'title' => string 'S' (length=1)
public 'GlossList' =>
object(stdClass)[5]
public 'GlossEntry' =>
object(stdClass)[6]
public 'ID' => string 'SGML' (length=4)
public 'SortAs' => string 'SGML' (length=4)
public 'GlossTerm' => string 'Standard Generalized Markup Language' (length=36)
public 'Acronym' => string 'SGML' (length=4)
public 'Abbrev' => string 'ISO 8879:1986' (length=13)
public 'GlossDef' =>
object(stdClass)[7]
...
public 'GlossSee' => string 'markup' (length=6)
public 'num1' => string '343434343434343434343' (length=21)
public 'num2' => string '1112222333344455566' (length=19)
public 'num3' =>
array (size=3)
...
public 'num4' =>
array (size=1)
...

修改类

define('SERVICES_JSON_SLICE',   1);
define('SERVICES_JSON_IN_STR', 2);
define('SERVICES_JSON_IN_ARR', 3);
define('SERVICES_JSON_IN_OBJ', 4);
define('SERVICES_JSON_IN_CMT', 5);
define('SERVICES_JSON_LOOSE_TYPE', 16);
define('SERVICES_JSON_SUPPRESS_ERRORS', 32);

class ServicesJSON
{

function __construct($use = 0)
{
$this->use = $use;
}

function utf162utf8($utf16)
{
// oh please oh please oh please oh please oh please
if(function_exists('mb_convert_encoding')) {
return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
}

$bytes = (ord($utf16{0}) << 8) | ord($utf16{1});

switch(true) {
case ((0x7F & $bytes) == $bytes):
// this case should never be reached, because we are in ASCII range
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr(0x7F & $bytes);

case (0x07FF & $bytes) == $bytes:
// return a 2-byte UTF-8 character
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr(0xC0 | (($bytes >> 6) & 0x1F))
. chr(0x80 | ($bytes & 0x3F));

case (0xFFFF & $bytes) == $bytes:
// return a 3-byte UTF-8 character
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr(0xE0 | (($bytes >> 12) & 0x0F))
. chr(0x80 | (($bytes >> 6) & 0x3F))
. chr(0x80 | ($bytes & 0x3F));
}

// ignoring UTF-32 for now, sorry
return '';
}
function utf82utf16($utf8)
{
// oh please oh please oh please oh please oh please
if(function_exists('mb_convert_encoding')) {
return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8');
}

switch(strlen($utf8)) {
case 1:
// this case should never be reached, because we are in ASCII range
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return $utf8;

case 2:
// return a UTF-16 character from a 2-byte UTF-8 char
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr(0x07 & (ord($utf8{0}) >> 2))
. chr((0xC0 & (ord($utf8{0}) << 6))
| (0x3F & ord($utf8{1})));

case 3:
// return a UTF-16 character from a 3-byte UTF-8 char
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr((0xF0 & (ord($utf8{0}) << 4))
| (0x0F & (ord($utf8{1}) >> 2)))
. chr((0xC0 & (ord($utf8{1}) << 6))
| (0x7F & ord($utf8{2})));
}

// ignoring UTF-32 for now, sorry
return '';
}


function encode($var)
{
switch (gettype($var)) {
case 'boolean':
return $var ? 'true' : 'false';

case 'NULL':
return 'null';

case 'integer':
return (int) $var;

case 'double':
case 'float':
return (float) $var;

case 'string':
// STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
$ascii = '';
$strlen_var = strlen($var);

/*
* Iterate over every character in the string,
* escaping with a slash or encoding to UTF-8 where necessary
*/
for ($c = 0; $c < $strlen_var; ++$c) {

$ord_var_c = ord($var{$c});

switch (true) {
case $ord_var_c == 0x08:
$ascii .= '\b';
break;
case $ord_var_c == 0x09:
$ascii .= '\t';
break;
case $ord_var_c == 0x0A:
$ascii .= '\n';
break;
case $ord_var_c == 0x0C:
$ascii .= '\f';
break;
case $ord_var_c == 0x0D:
$ascii .= '\r';
break;

case $ord_var_c == 0x22:
case $ord_var_c == 0x2F:
case $ord_var_c == 0x5C:
// double quote, slash, slosh
$ascii .= '\\'.$var{$c};
break;

case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
// characters U-00000000 - U-0000007F (same as ASCII)
$ascii .= $var{$c};
break;

case (($ord_var_c & 0xE0) == 0xC0):
// characters U-00000080 - U-000007FF, mask 110XXXXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c, ord($var{$c + 1}));
$c += 1;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;

case (($ord_var_c & 0xF0) == 0xE0):
// characters U-00000800 - U-0000FFFF, mask 1110XXXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}));
$c += 2;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;

case (($ord_var_c & 0xF8) == 0xF0):
// characters U-00010000 - U-001FFFFF, mask 11110XXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}));
$c += 3;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;

case (($ord_var_c & 0xFC) == 0xF8):
// characters U-00200000 - U-03FFFFFF, mask 111110XX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}),
ord($var{$c + 4}));
$c += 4;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;

case (($ord_var_c & 0xFE) == 0xFC):
// characters U-04000000 - U-7FFFFFFF, mask 1111110X
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}),
ord($var{$c + 4}),
ord($var{$c + 5}));
$c += 5;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
}
}

return '"'.$ascii.'"';

case 'array':

// treat as a JSON object
if (is_array($var) && count($var) && (array_keys($var) !== range(0, sizeof($var) - 1))) {
$properties = array_map(array($this, 'name_value'),
array_keys($var),
array_values($var));

foreach($properties as $property) {
if(ServicesJSON::isError($property)) {
return $property;
}
}

return '{' . join(',', $properties) . '}';
}

// treat it like a regular array
$elements = array_map(array($this, 'encode'), $var);

foreach($elements as $element) {
if(ServicesJSON::isError($element)) {
return $element;
}
}

return '[' . join(',', $elements) . ']';

case 'object':
$vars = get_object_vars($var);

$properties = array_map(array($this, 'name_value'),
array_keys($vars),
array_values($vars));

foreach($properties as $property) {
if(ServicesJSON::isError($property)) {
return $property;
}
}

return '{' . join(',', $properties) . '}';

default:
return ($this->use & SERVICES_JSON_SUPPRESS_ERRORS)
? 'null'
: new ServicesJSON_Error(gettype($var)." can not be encoded as JSON string");
}
}


function name_value($name, $value)
{
$encoded_value = $this->encode($value);

if(ServicesJSON::isError($encoded_value)) {
return $encoded_value;
}

return $this->encode(strval($name)) . ':' . $encoded_value;
}


function reduce_string($str)
{
$str = preg_replace(array(

// eliminate single line comments in '// ...' form
'#^\s*//(.+)$#m',

// eliminate multi-line comments in '/* ... */' form, at start of string
'#^\s*/\*(.+)\*/#Us',

// eliminate multi-line comments in '/* ... */' form, at end of string
'#/\*(.+)\*/\s*$#Us'

), '', $str);

// eliminate extraneous space
return trim($str);
}


function decode($str)
{
$str = $this->reduce_string($str);

switch (strtolower($str)) {
case 'true':
return true;

case 'false':
return false;

case 'null':
return null;

default:
$m = array();

if (is_numeric($str)) {
/**
* @todo BIG INT PATH
*/
return $str;

} elseif (preg_match('/^("|\').*(\1)$/s', $str, $m) && $m[1] == $m[2]) {
// STRINGS RETURNED IN UTF-8 FORMAT
$delim = substr($str, 0, 1);
$chrs = substr($str, 1, -1);
$utf8 = '';
$strlen_chrs = strlen($chrs);

for ($c = 0; $c < $strlen_chrs; ++$c) {

$substr_chrs_c_2 = substr($chrs, $c, 2);
$ord_chrs_c = ord($chrs{$c});

switch (true) {
case $substr_chrs_c_2 == '\b':
$utf8 .= chr(0x08);
++$c;
break;
case $substr_chrs_c_2 == '\t':
$utf8 .= chr(0x09);
++$c;
break;
case $substr_chrs_c_2 == '\n':
$utf8 .= chr(0x0A);
++$c;
break;
case $substr_chrs_c_2 == '\f':
$utf8 .= chr(0x0C);
++$c;
break;
case $substr_chrs_c_2 == '\r':
$utf8 .= chr(0x0D);
++$c;
break;

case $substr_chrs_c_2 == '\\"':
case $substr_chrs_c_2 == '\\\'':
case $substr_chrs_c_2 == '\\\\':
case $substr_chrs_c_2 == '\\/':
if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
($delim == "'" && $substr_chrs_c_2 != '\\"')) {
$utf8 .= $chrs{++$c};
}
break;

case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)):
// single, escaped unicode character
$utf16 = chr(hexdec(substr($chrs, ($c + 2), 2)))
. chr(hexdec(substr($chrs, ($c + 4), 2)));
$utf8 .= $this->utf162utf8($utf16);
$c += 5;
break;

case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
$utf8 .= $chrs{$c};
break;

case ($ord_chrs_c & 0xE0) == 0xC0:
// characters U-00000080 - U-000007FF, mask 110XXXXX
//see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$utf8 .= substr($chrs, $c, 2);
++$c;
break;

case ($ord_chrs_c & 0xF0) == 0xE0:
// characters U-00000800 - U-0000FFFF, mask 1110XXXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$utf8 .= substr($chrs, $c, 3);
$c += 2;
break;

case ($ord_chrs_c & 0xF8) == 0xF0:
// characters U-00010000 - U-001FFFFF, mask 11110XXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$utf8 .= substr($chrs, $c, 4);
$c += 3;
break;

case ($ord_chrs_c & 0xFC) == 0xF8:
// characters U-00200000 - U-03FFFFFF, mask 111110XX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$utf8 .= substr($chrs, $c, 5);
$c += 4;
break;

case ($ord_chrs_c & 0xFE) == 0xFC:
// characters U-04000000 - U-7FFFFFFF, mask 1111110X
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$utf8 .= substr($chrs, $c, 6);
$c += 5;
break;

}

}

return $utf8;

} elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
// array, or object notation

if ($str{0} == '[') {
$stk = array(SERVICES_JSON_IN_ARR);
$arr = array();
} else {
if ($this->use & SERVICES_JSON_LOOSE_TYPE) {
$stk = array(SERVICES_JSON_IN_OBJ);
$obj = array();
} else {
$stk = array(SERVICES_JSON_IN_OBJ);
$obj = new stdClass();
}
}

array_push($stk, array('what' => SERVICES_JSON_SLICE,
'where' => 0,
'delim' => false));

$chrs = substr($str, 1, -1);
$chrs = $this->reduce_string($chrs);

if ($chrs == '') {
if (reset($stk) == SERVICES_JSON_IN_ARR) {
return $arr;

} else {
return $obj;

}
}

//print("\nparsing {$chrs}\n");

$strlen_chrs = strlen($chrs);

for ($c = 0; $c <= $strlen_chrs; ++$c) {

$top = end($stk);
$substr_chrs_c_2 = substr($chrs, $c, 2);

if (($c == $strlen_chrs) || (($chrs{$c} == ',') && ($top['what'] == SERVICES_JSON_SLICE))) {
// found a comma that is not inside a string, array, etc.,
// OR we've reached the end of the character list
$slice = substr($chrs, $top['where'], ($c - $top['where']));


/**
* @todo patch slice
*/
is_numeric($slice) and $slice = "\"$slice\"";
array_push($stk, array('what' => SERVICES_JSON_SLICE, 'where' => ($c + 1), 'delim' => false));
//print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");

if (reset($stk) == SERVICES_JSON_IN_ARR) {
// we are in an array, so just push an element onto the stack
array_push($arr, $this->decode($slice));

} elseif (reset($stk) == SERVICES_JSON_IN_OBJ) {
// we are in an object, so figure
// out the property name and set an
// element in an associative array,
// for now
$parts = array();

if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
// "name":value pair
$key = $this->decode($parts[1]);
$val = $this->decode($parts[2]);

if ($this->use & SERVICES_JSON_LOOSE_TYPE) {
$obj[$key] = $val;
} else {
$obj->$key = $val;
}
} elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
// name:value pair, where name is unquoted
$key = $parts[1];
$val = $this->decode($parts[2]);

if ($this->use & SERVICES_JSON_LOOSE_TYPE) {
$obj[$key] = $val;
} else {
$obj->$key = $val;
}
}

}

} elseif ((($chrs{$c} == '"') || ($chrs{$c} == "'")) && ($top['what'] != SERVICES_JSON_IN_STR)) {
// found a quote, and we are not inside a string
array_push($stk, array('what' => SERVICES_JSON_IN_STR, 'where' => $c, 'delim' => $chrs{$c}));
//print("Found start of string at {$c}\n");

} elseif (($chrs{$c} == $top['delim']) &&
($top['what'] == SERVICES_JSON_IN_STR) &&
((strlen(substr($chrs, 0, $c)) - strlen(rtrim(substr($chrs, 0, $c), '\\'))) % 2 != 1)) {
// found a quote, we're in a string, and it's not escaped
// we know that it's not escaped becase there is _not_ an
// odd number of backslashes at the end of the string so far
array_pop($stk);
//print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");

} elseif (($chrs{$c} == '[') &&
in_array($top['what'], array(SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ))) {
// found a left-bracket, and we are in an array, object, or slice
array_push($stk, array('what' => SERVICES_JSON_IN_ARR, 'where' => $c, 'delim' => false));
//print("Found start of array at {$c}\n");

} elseif (($chrs{$c} == ']') && ($top['what'] == SERVICES_JSON_IN_ARR)) {
// found a right-bracket, and we're in an array
array_pop($stk);
//print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");

} elseif (($chrs{$c} == '{') &&
in_array($top['what'], array(SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ))) {
// found a left-brace, and we are in an array, object, or slice
array_push($stk, array('what' => SERVICES_JSON_IN_OBJ, 'where' => $c, 'delim' => false));
//print("Found start of object at {$c}\n");

} elseif (($chrs{$c} == '}') && ($top['what'] == SERVICES_JSON_IN_OBJ)) {
// found a right-brace, and we're in an object
array_pop($stk);
//print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");

} elseif (($substr_chrs_c_2 == '/*') &&
in_array($top['what'], array(SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ))) {
// found a comment start, and we are in an array, object, or slice
array_push($stk, array('what' => SERVICES_JSON_IN_CMT, 'where' => $c, 'delim' => false));
$c++;
//print("Found start of comment at {$c}\n");

} elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == SERVICES_JSON_IN_CMT)) {
// found a comment end, and we're in one now
array_pop($stk);
$c++;

for ($i = $top['where']; $i <= $c; ++$i)
$chrs = substr_replace($chrs, ' ', $i, 1);

//print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");

}

}

if (reset($stk) == SERVICES_JSON_IN_ARR) {
return $arr;

} elseif (reset($stk) == SERVICES_JSON_IN_OBJ) {
return $obj;

}

}
}
}

/**
*
* @todo Ultimately, this should just call PEAR::isError()
*/
function isError($data, $code = null) {
if (is_object($data) && (get_class($data) == 'ServicesJSON_Error' || is_subclass_of($data, 'ServicesJSON_Error'))) {
return true;
}

return false;
}
}
class ServicesJSON_Error {
function ServicesJSON_Error($message = 'unknown error', $code = null, $mode = null, $options = null, $userinfo = null) {
}
}

关于php - JSON_BIGINT_AS_STRING 向后移植,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/15659325/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com