metadata:

iceberg 元数据组织方式_数据湖

// org.apache.iceberg.hadoop.HadoopTableOperations#metadataRoot
private Path metadataRoot() {
return new Path(location, "metadata");
}

metadata.json:

// org.apache.iceberg.hadoop.HadoopTableOperations#metadataFilePath
private Path metadataFilePath(int metadataVersion, TableMetadataParser.Codec codec) {
return metadataPath("v" + metadataVersion + TableMetadataParser.getFileExtension(codec));
}
// org.apache.iceberg.TableMetadataParser#getFileExtension(org.apache.iceberg.TableMetadataParser.Codec)
public static String getFileExtension(Codec codec) {
return codec.extension + ".metadata.json";
}

snapshot: 时间

// org.apache.iceberg.SnapshotProducer#manifestListPath
protected OutputFile manifestListPath() {
return ops.io().newOutputFile(ops.metadataFileLocation(FileFormat.AVRO.addExtension(
String.format("snap-%d-%d-%s", snapshotId(), attempt.incrementAndGet(), commitUUID))));
}

manifest: 提交

// org.apache.iceberg.SnapshotProducer#newManifestOutput
protected OutputFile newManifestOutput() {
return ops.io().newOutputFile(
ops.metadataFileLocation(FileFormat.AVRO.addExtension(commitUUID + "-m" + manifestCount.getAndIncrement())));
}

data:

iceberg 元数据组织方式_hadoop_02

// org.apache.iceberg.LocationProviders.DefaultLocationProvider#DefaultLocationProvider
this.dataLocation = stripTrailingSlash(properties.getOrDefault(
TableProperties.WRITE_NEW_DATA_LOCATION,
String.format("%s/data", tableLocation)));

partition:

// org.apache.iceberg.LocationProviders.DefaultLocationProvider#newDataLocation(org.apache.iceberg.PartitionSpec, org.apache.iceberg.StructLike, java.lang.String)
public String newDataLocation(PartitionSpec spec, StructLike partitionData, String filename) {
return String.format("%s/%s/%s", dataLocation, spec.partitionToPath(partitionData), filename);
}