Avro文件序列化与反序列化学习
前提:新建maven工程
1、src/main下面新建avro文件夹
项目目录如下图:
2、pom.xml添加依赖包和编译插件
编译包配置:
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-tools</artifactId>
<version>1.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<version>1.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-compiler</artifactId>
<version>1.8.2</version>
</dependency>
</dependencies>
插件的配置:
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<version>1.8.2</version>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>schema</goal>
</goals>
<configuration>
<sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory>
<outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
ps:这里的<execution>
标签会报错,添<pluginManagement>
标签就好了
3、定义schema
使用JSON为Avro定义schema。schema由基本类型(null,boolean, int, long, float, double, bytes 和string)和复杂类型(record, enum, array, map, union, 和fixed)组成
在src/main/avro文件夹下新建AvPAO.avsc文件,内容如下:
{“type”:“record”,“name”:“AvPAO”,“namespace”:“avro.avro”,“fields”:[{“name”:“flag”,“type”:“int”},{“name”:“dateTime”,“type”:“string”},{“name”:“rowkey”,“type”:“string”},{“name”:“org”,“type”:“string”},{“name”:“tvl”,“type”:“string”},{“name”:“response”,“type”:“string”}]}
namespace —— 生成的java文件的包
type —— record
name —— 生成类的名字
fields —— 字段名和类型
4、生成AvPAO.java类
使用指令java -jar avro-tools-1.8.2.jar compile schema AvPAO.avsc java生成
Ps:使用eclipse一直没生成成功,手动生成了AvPAO.java类然后拷贝到avro.avro包下
5、avro文件反序列化,代码如下:
package avro.avro;
import java.io.File;
import java.io.IOException;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.junit.Test;
public class TestAvPAO {
@Test
public void testCreateUserClass() throws IOException {
// Deserialize Users from disk
DatumReader<AvPAO> userDatumReader = new SpecificDatumReader<AvPAO>(AvPAO.class);
DataFileReader<AvPAO> dataFileReader = new DataFileReader<AvPAO>(new File("C:/Users/duyi/Desktop/avro91474834.avro"), userDatumReader);
AvPAO av = null;
while (dataFileReader.hasNext()) {
// Reuse user object by passing it to next(). This saves us from
// allocating and garbage collecting many objects for files with
// many items.
av = dataFileReader.next(av);
System.out.println(av);
}
}
}
运行结果如下:
6、avro文件序列化,代码如下:
package avro.avro;
import java.io.File;
import java.io.IOException;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.junit.Test;
public class TestAvPAO {
@Test
public void testCreateUserClass() throws IOException {
AvPAO pao = new AvPAO();
pao.setDateTime("324234");
pao.setFlag(1);
pao.setOrg("23423");
pao.setResponse("234324");
pao.setRowkey("23423");
pao.setTvl("234234");
// Serialize user1, user2 and user3 to disk
DatumWriter<AvPAO> userDatumWriter = new SpecificDatumWriter<AvPAO>(AvPAO.class);
DataFileWriter<AvPAO> dataFileWriter = new DataFileWriter<AvPAO>(userDatumWriter);
dataFileWriter.create(pao.getSchema(), new File("AvPAO.avro"));
dataFileWriter.append(pao);
dataFileWriter.close();
// Deserialize Users from disk
DatumReader<AvPAO> userDatumReader = new SpecificDatumReader<AvPAO>(AvPAO.class);
DataFileReader<AvPAO> dataFileReader = new DataFileReader<AvPAO>(new File("AvPAO.avro"), userDatumReader);
AvPAO user = null;
while (dataFileReader.hasNext()) {
// Reuse user object by passing it to next(). This saves us from
// allocating and garbage collecting many objects for files with
// many items.
user = dataFileReader.next(user);
System.out.println(user);
}
}
}
运行结果如下图:
总结:还行吧,不是很难。