gpt4 book ai didi

java - Avro 模式演变 : Can't add or remove fields

转载 作者:塔克拉玛干 更新时间:2023-11-02 08:02:54 26 4
gpt4 key购买 nike

我目前正在尝试改进我的 avro 架构,根据文档,这应该没什么大不了的。但是,在添加或删除字段时,Avro 无法反序列化字节。

我正在使用以下架构:

AvroSchemas.avsc:

[
{
"namespace": "stackoverflow.example.avro",
"type": "record",
"name": "Record_1_1",
"fields": [
{"name": "value0", "type": "string"}
]
},
{
"namespace": "stackoverflow.example.avro",
"type": "record",
"name": "Record_1_2",
"fields": [
{"name": "value0", "type": "string"},
{"name": "value1", "type": "string", "default": "Hello World"}
]
},
{
"namespace": "stackoverflow.example.avro",
"type": "record",
"name": "Record_2_1",
"fields": [
{"name": "someList", "type": {"type": "array", "items": "int"}}
]
},
{
"namespace": "stackoverflow.example.avro",
"type": "record",
"name": "Record_2_2",
"fields": [
{"name": "someBool", "type": "boolean", "default": "false"},
{"name": "someList", "type": {"type": "array", "items": "int"}}
]
}
]

这些类是使用以下 Maven 构建插件生成的:

  <plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<version>1.8.2</version>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>schema</goal>
</goals>
<configuration>
<sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory>
<outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
<stringType>String</stringType>
</configuration>
</execution>
</executions>
</plugin>

这是我用来测试进化的代码:

AvroTest.java:

package stackoverflow.example;

import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.Objects;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import stackoverflow.example.avro.Record_1_1;
import stackoverflow.example.avro.Record_1_2;
import stackoverflow.example.avro.Record_2_1;
import stackoverflow.example.avro.Record_2_2;

public class AvroTest {

public static void main(String[] args) throws Exception {
executeTest0();
executeTest1();
executeTest2();
}

/**
* Test if read and write methods work
*/
private static void executeTest0() {
Record_1_1 source1 = new Record_1_1("A");
Record_1_1 dest1 = trySerializeDeserialize(source1, Record_1_1.class, Record_1_1.class);
if (dest1 == null || !Objects.equals(source1.getValue0(), dest1.getValue0())) {
throw new RuntimeException("Record_1_1 Test 0 failed");
}

Record_1_2 source2 = new Record_1_2("A", "B");
Record_1_2 dest2 = trySerializeDeserialize(source2, Record_1_2.class, Record_1_2.class);
if (dest2 == null || !Objects.equals(source2.getValue0(), dest2.getValue0()) || !Objects.equals(source2.getValue1(), dest2.getValue1())) {
throw new RuntimeException("Record_1_2 Test 0 failed");
}

Record_2_1 source3 = new Record_2_1(new ArrayList<>());
Record_2_1 dest3 = trySerializeDeserialize(source3, Record_2_1.class, Record_2_1.class);
if (dest3 == null || !Objects.equals(source3.getSomeList(), dest3.getSomeList())) {
throw new RuntimeException("Record_2_1 Test 0 failed");
}

Record_2_2 source4 = new Record_2_2(true, new ArrayList<>());
Record_2_2 dest4 = trySerializeDeserialize(source4, Record_2_2.class, Record_2_2.class);
if (dest4 == null || !Objects.equals(source4.getSomeBool(), dest4.getSomeBool()) || !Objects.equals(source4.getSomeList(), dest4.getSomeList())) {
throw new RuntimeException("Record_2_2 Test 0 failed");
}
}

private static void executeTest1() {
Record_1_1 source1 = new Record_1_1("Test");
Record_1_2 dest1 = trySerializeDeserialize(source1, Record_1_1.class, Record_1_2.class);
if (dest1 == null || !Objects.equals(dest1.getValue1(), "Hello World")) {
System.out.println("adding field with default value failed: " + dest1);
}

Record_1_2 source2 = new Record_1_2("Test0", "Test1");
Record_1_1 dest2 = trySerializeDeserialize(source2, Record_1_2.class, Record_1_1.class);
if (dest2 == null || !Objects.equals(source2.getValue0(), dest2.getValue0())) {
System.out.println("removing field failed: " + dest2);
}
}

private static void executeTest2() {
Record_2_1 source1 = new Record_2_1(new ArrayList<>());
Record_2_2 dest1 = trySerializeDeserialize(source1, Record_2_1.class, Record_2_2.class);
if (dest1 == null || !Objects.equals(source1.getSomeList(), dest1.getSomeList())) {
System.out.println("adding boolean field with default value failed: " + dest1);
}

Record_2_2 source2 = new Record_2_2(true, new ArrayList<>());
Record_2_1 dest2 = trySerializeDeserialize(source2, Record_2_2.class, Record_2_1.class);
if (dest2 == null || !Objects.equals(source2.getSomeList(), dest2.getSomeList())) {
System.out.println("removing boolean field failed: " + dest2);
}
}

private static <T, E> E trySerializeDeserialize(T source, Class<T> sourceClass, Class<E> destClass) {
E result;

try {
byte[] bytes = write(source, sourceClass);
result = read(bytes, destClass);
} catch (Exception e) {
result = null;
}

return result;
}

private static <T> byte[] write(T value, Class<T> clazz) throws Exception {
byte[] bytes;

try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) {
Encoder encoder = EncoderFactory.get().binaryEncoder(bos, null);
DatumWriter<T> writer = new SpecificDatumWriter<>(clazz);
writer.write(value, encoder);

encoder.flush();
bytes = bos.toByteArray();
}

return bytes;
}

private static <T> T read(byte[] bytes, Class<T> clazz) throws Exception {
Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null);
DatumReader<T> reader = new SpecificDatumReader<>(clazz);

return reader.read(null, decoder);
}
}

输出:

adding field with default value failed: null
adding boolean field with default value failed: null
removing boolean field failed: null

根据文档,我的所有测试都应该有效(添加一个具有默认值的字段或在接收端删除一个字段)。但我不认为这些文档只是为了好玩而编写的,所以我可能缺少某些设置吗?

最佳答案

问题在于您尝试反序列化数据的方式。使用 SpecificDatumReader(Class<T>) 时构造函数,读者假定作者的模式和读者的模式是相同的。

您可以使用 SpecificDatumReader(Schema writer, Schema reader) 修复此问题反而。例如:

private static <T, E> E read(byte[] bytes, Class<T> sourceClass, Class<E> destClass) throws Exception {
Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null);
DatumReader<E> reader = new SpecificDatumReader<>(
SpecificData.get().getSchema(sourceClass),
SpecificData.get().getSchema(destClass));

return reader.read(null, decoder);
}

注意 DatumWriter 的输出不是 Avro 文件,它始终包含用于序列化其 header 中数据的模式,而是一个没有 header 的序列化对象。如果你想测试 Avro 文件,你应该使用 DataFileWriterDataFileReader .


您的所有架构更改都是兼容的,并且应该根据 Avro format specification 工作.架构中唯一的错误是默认值 someBool - 它应该是 boolean 值 ( false ) 而不是字符串 ( "false" )。

关于java - Avro 模式演变 : Can't add or remove fields,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/45917760/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com