有选择地从 S3 中的 zip 文件中提取条目，而无需下载整个文件

我还可以使用 zip4j 通过以下代码使其工作。但是我仍然不明白通过等式的解码部分： long endFile = 30 + offset + header.getFileNameLength() + compressedSize - 1;。30从哪里来？我如何确保该方程包含所有用例的所有必要变量？public static void main(String[] args) throws Exception {        S3Client s3Client = S3Client.builder()                .credentialsProvider(StaticCredentialsProvider                        .create(AwsSessionCredentials.create(ACCESS_KEY, SECRET_KEY, SESSION_TOKEN)))                .region(Region.US_WEST_2)                .build();        HeadObjectResponse headObject = s3Client.headObject(HeadObjectRequest.builder()                .bucket(BUCKET)                .key(OBJECT_PATH)                .build());        long zipSize = headObject.contentLength();        // fetch the last 22 bytes (end-of-central-directory record; assuming the comment field is empty)        long eocdStart = zipSize - 22;        final var eocdStream = s3Client.getObject(GetObjectRequest.builder()                .bucket(BUCKET)                .key(OBJECT_PATH)                .range("bytes=%d-%d".formatted(eocdStart, zipSize))                .build());        System.out.println("eocd start: " + eocdStart);        byte[] eocd = IOUtils.toByteArray(eocdStream);        // get the start offset and size of the central directory        int cdSize = byteArrayToLeInt(Arrays.copyOfRange(eocd, 12, 16));        int cdStart = byteArrayToLeInt(Arrays.copyOfRange(eocd, 16, 20));        System.out.println("cdStart: " + cdStart);        System.out.println("cdSize: " + cdSize);        // get the full central directory        final var cdStream = s3Client.getObject(GetObjectRequest.builder()                .bucket(BUCKET)                .key(OBJECT_PATH)                .range("bytes=%d-%d".formatted(cdStart, cdStart + cdSize - 1))                .build());        byte[] cd = IOUtils.toByteArray(cdStream);        // write the full dir + eocd:        ByteArrayOutputStream out = new ByteArrayOutputStream();        // write cd        out.write(cd);        // write eocd, resetting the cd start to 0 since that is        // where it will appear in our new temp file        byte[] b = leIntToByteArray(0);        eocd[16] = b[0];        eocd[17] = b[1];        eocd[18] = b[2];        eocd[19] = b[3];        out.write(eocd);        out.flush();        byte[] cdbytes = out.toByteArray();        System.out.println(cdbytes.length);        File tempFile = Files.createTempFile("temp", "zip").toFile();        FileOutputStream output = new FileOutputStream(tempFile);        output.write(cdbytes);        output.flush();        output.close();        getZipFile1(s3Client, tempFile, "a2ed09e5-dfdb-4a66-95f5-8bb62bc8fafd-2023-05-23T10_07_19Z.warc.gz");        getZipFile1(s3Client, tempFile, "index.cdx.gz");        getZipFile1(s3Client, tempFile, "index.cdx");        getZipFile1(s3Client, tempFile, "extraPages.jsonl");        getZipFile1(s3Client, tempFile, "pages.jsonl");        getZipFile1(s3Client, tempFile, "datapackage.json");        getZipFile1(s3Client, tempFile, "datapackage-digest.json");}private static void getZipFile1(S3Client s3Client, File tempFile, String file) throws Exception {        ZipFile zipFile = new ZipFile(tempFile);        for (var header : zipFile.getFileHeaders()) {            if (!header.isDirectory()) {                if (header.getFileName().contains(file)) {                    System.out.println(header);                    long offset = header.getOffsetLocalHeader(); // 41489906                    int compressedSize = (int) header.getCompressedSize(); // 171                    long endFile = 30 + offset + header.getFileNameLength() + compressedSize - 1;                    byte[] fileBytes = IOUtils.toByteArray(s3Client.getObject(GetObjectRequest.builder()                            .bucket(BUCKET)                            .key(OBJECT_PATH)                            .range("bytes=%d-%d".formatted(offset, endFile))                            .build()));                    ZipInputStream zipInputStream = new ZipInputStream(new ByteArrayInputStream(fileBytes));                    zipInputStream.getNextEntry(header, true);                    File outputFile = new File("/home/joao/Downloads/folder/" + header.getFileName());                    Files.deleteIfExists(outputFile.toPath());                    FileUtils.copyInputStreamToFile(zipInputStream, outputFile);                }            }        }}

有选择地从 S3 中的 zip 文件中提取条目，而无需下载整个文件

2回答