gpt4 book ai didi

java - 使用 GSON JsonReader 处理大字段的最佳方法

转载 作者:行者123 更新时间:2023-11-30 06:52:04 25 4
gpt4 key购买 nike

我收到 java.lang.OutOfMemoryError: Java heap space even with GSON Streaming。

{"result":"OK","base64":"JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC...."}

base64 最长可达 200Mb。 GSON 占用的内存比这多得多,(3GB) 当我尝试将 base64 存储在一个变量中时,我得到:

Exception in thread "main" java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOf(Arrays.java:2367)
at java.lang.AbstractStringBuilder.expandCapacity(AbstractStringBuilder.java:130)
at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:114)
at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:535)
at java.lang.StringBuilder.append(StringBuilder.java:204)
at com.google.gson.stream.JsonReader.nextQuotedValue(JsonReader.java:1014)
at com.google.gson.stream.JsonReader.nextString(JsonReader.java:815)

处理这类字段的最佳方法是什么?

最佳答案

你得到 OutOfMemoryError 的原因是 GSON nextString()返回一个字符串,该字符串是在使用 StringBuilder 构建一个非常大的字符串期间聚合的.当您遇到这样的问题时,您必须处理中间数据,因为别无选择。不幸的是,GSON 不允许您以任何方式处理大量文字。

不确定您是否可以更改响应负载,但如果不能,您可能想要实现自己的 JSON 阅读器,或者“破解”现有的 JsonReader使其以流媒体方式工作。下面的示例基于 GSON 2.5 并大量使用反射,因为 JsonReader非常小心地隐藏其状态。

EnhancedGson25JsonReader.java

final class EnhancedGson25JsonReader
extends JsonReader {

// A listener to accept the internal character buffers.
// Accepting a single string built on such buffers is total memory waste as well.
interface ISlicedStringListener {

void accept(char[] buffer, int start, int length)
throws IOException;

}

// The constants can be just copied

/** @see JsonReader#PEEKED_NONE */
private static final int PEEKED_NONE = 0;

/** @see JsonReader#PEEKED_SINGLE_QUOTED */
private static final int PEEKED_SINGLE_QUOTED = 8;

/** @see JsonReader#PEEKED_DOUBLE_QUOTED */
private static final int PEEKED_DOUBLE_QUOTED = 9;

// Here is a bunch of spies made to "spy" for the parent's class state

private final FieldSpy<Integer> peeked;
private final MethodSpy<Integer> doPeek;
private final MethodSpy<Integer> getLineNumber;
private final MethodSpy<Integer> getColumnNumber;
private final FieldSpy<char[]> buffer;
private final FieldSpy<Integer> pos;
private final FieldSpy<Integer> limit;
private final MethodSpy<Character> readEscapeCharacter;
private final FieldSpy<Integer> lineNumber;
private final FieldSpy<Integer> lineStart;
private final MethodSpy<Boolean> fillBuffer;
private final MethodSpy<IOException> syntaxError;
private final FieldSpy<Integer> stackSize;
private final FieldSpy<int[]> pathIndices;

private EnhancedJsonReader(final Reader reader)
throws NoSuchFieldException, NoSuchMethodException {
super(reader);
peeked = spyField(JsonReader.class, this, "peeked");
doPeek = spyMethod(JsonReader.class, this, "doPeek");
getLineNumber = spyMethod(JsonReader.class, this, "getLineNumber");
getColumnNumber = spyMethod(JsonReader.class, this, "getColumnNumber");
buffer = spyField(JsonReader.class, this, "buffer");
pos = spyField(JsonReader.class, this, "pos");
limit = spyField(JsonReader.class, this, "limit");
readEscapeCharacter = spyMethod(JsonReader.class, this, "readEscapeCharacter");
lineNumber = spyField(JsonReader.class, this, "lineNumber");
lineStart = spyField(JsonReader.class, this, "lineStart");
fillBuffer = spyMethod(JsonReader.class, this, "fillBuffer", int.class);
syntaxError = spyMethod(JsonReader.class, this, "syntaxError", String.class);
stackSize = spyField(JsonReader.class, this, "stackSize");
pathIndices = spyField(JsonReader.class, this, "pathIndices");
}

static EnhancedJsonReader getEnhancedGson25JsonReader(final Reader reader) {
try {
return new EnhancedJsonReader(reader);
} catch ( final NoSuchFieldException | NoSuchMethodException ex ) {
throw new RuntimeException(ex);
}
}

// This method has been copied and reworked from the nextString() implementation

void nextSlicedString(final ISlicedStringListener listener)
throws IOException {
int p = peeked.get();
if ( p == PEEKED_NONE ) {
p = doPeek.get();
}
switch ( p ) {
case PEEKED_SINGLE_QUOTED:
nextQuotedSlicedValue('\'', listener);
break;
case PEEKED_DOUBLE_QUOTED:
nextQuotedSlicedValue('"', listener);
break;
default:
throw new IllegalStateException("Expected a string but was " + peek()
+ " at line " + getLineNumber.get()
+ " column " + getColumnNumber.get()
+ " path " + getPath()
);
}
peeked.accept(PEEKED_NONE);
pathIndices.get()[stackSize.get() - 1]++;
}

// The following method is also a copy-paste that was patched for the "spies".
// It's, in principle, the same as the source one, but it has one more buffer singleCharBuffer
// in order not to add another method to the ISlicedStringListener interface (enjoy lamdbas as much as possible).
// Note that the main difference between these two methods is that this one
// does not aggregate a single string value, but just delegates the internal
// buffers to call-sites, so the latter ones might do anything with the buffers.

/**
* @see JsonReader#nextQuotedValue(char)
*/
private void nextQuotedSlicedValue(final char quote, final ISlicedStringListener listener)
throws IOException {
final char[] buffer = this.buffer.get();
final char[] singleCharBuffer = new char[1];
while ( true ) {
int p = pos.get();
int l = limit.get();
int start = p;
while ( p < l ) {
final int c = buffer[p++];
if ( c == quote ) {
pos.accept(p);
listener.accept(buffer, start, p - start - 1);
return;
} else if ( c == '\\' ) {
pos.accept(p);
listener.accept(buffer, start, p - start - 1);
singleCharBuffer[0] = readEscapeCharacter.get();
listener.accept(singleCharBuffer, 0, 1);
p = pos.get();
l = limit.get();
start = p;
} else if ( c == '\n' ) {
lineNumber.accept(lineNumber.get() + 1);
lineStart.accept(p);
}
}
listener.accept(buffer, start, p - start);
pos.accept(p);
if ( !fillBuffer.apply(just1) ) {
throw syntaxError.apply(justUnterminatedString);
}
}
}

// Save some memory

private static final Object[] just1 = { 1 };
private static final Object[] justUnterminatedString = { "Unterminated string" };

}

FieldSpy.java

final class FieldSpy<T>
implements Supplier<T>, Consumer<T> {

private final Object instance;
private final Field field;

private FieldSpy(final Object instance, final Field field) {
this.instance = instance;
this.field = field;
}

static <T> FieldSpy<T> spyField(final Class<?> declaringClass, final Object instance, final String fieldName)
throws NoSuchFieldException {
final Field field = declaringClass.getDeclaredField(fieldName);
field.setAccessible(true);
return new FieldSpy<>(instance, field);
}

@Override
public T get() {
try {
@SuppressWarnings("unchecked")
final T value = (T) field.get(instance);
return value;
} catch ( final IllegalAccessException ex ) {
throw new RuntimeException(ex);
}
}

@Override
public void accept(final T value) {
try {
field.set(instance, value);
} catch ( final IllegalAccessException ex ) {
throw new RuntimeException(ex);
}
}

}

MethodSpy.java

final class MethodSpy<T>
implements Function<Object[], T>, Supplier<T> {

private static final Object[] emptyObjectArray = {};

private final Object instance;
private final Method method;

private MethodSpy(final Object instance, final Method method) {
this.instance = instance;
this.method = method;
}

static <T> MethodSpy<T> spyMethod(final Class<?> declaringClass, final Object instance, final String methodName, final Class<?>... parameterTypes)
throws NoSuchMethodException {
final Method method = declaringClass.getDeclaredMethod(methodName, parameterTypes);
method.setAccessible(true);
return new MethodSpy<>(instance, method);
}

@Override
public T get() {
// my javac generates useless new Object[0] if no args passed
return apply(emptyObjectArray);
}

@Override
public T apply(final Object[] arguments) {
try {
@SuppressWarnings("unchecked")
final T value = (T) method.invoke(instance, arguments);
return value;
} catch ( final IllegalAccessException | InvocationTargetException ex ) {
throw new RuntimeException(ex);
}
}

}

HugeJsonReaderDemo.java

这是一个使用该方法读取巨大 JSON 并将其字符串值重定向到另一个文件的演示。

public static void main(final String... args)
throws IOException {
try ( final EnhancedGson25JsonReader input = getEnhancedGson25JsonReader(new InputStreamReader(new FileInputStream("./huge.json")));
final Writer output = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream("./huge.json.STRINGS"))) ) {
while ( input.hasNext() ) {
final JsonToken token = input.peek();
switch ( token ) {
case BEGIN_OBJECT:
input.beginObject();
break;
case NAME:
input.nextName();
break;
case STRING:
input.nextSlicedString(output::write);
break;
default:
throw new AssertionError(token);
}
}
}
}

我成功地将上面的字段提取到一个文件中。输入文件的长度为 544 MB(570 425 371 字节),由以下 JSON block 生成:

  • {"result":"OK","base64":"
  • JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC × 16777216 (2^24)
  • "}

结果是(因为我只是将所有字符串重定向到文件):

  • OK
  • JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC × 16777216 (2^24)

我认为您遇到了一个非常有趣的问题。很高兴收到 GSON 团队关于可能的 API 增强的一些反馈。

关于java - 使用 GSON JsonReader 处理大字段的最佳方法,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/39615673/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com