慕的地10843
我还可以使用 zip4j 通过以下代码使其工作。但是我仍然不明白通过等式的解码部分: long endFile = 30 + offset + header.getFileNameLength() + compressedSize - 1;。30从哪里来?我如何确保该方程包含所有用例的所有必要变量?public static void main(String[] args) throws Exception { S3Client s3Client = S3Client.builder() .credentialsProvider(StaticCredentialsProvider .create(AwsSessionCredentials.create(ACCESS_KEY, SECRET_KEY, SESSION_TOKEN))) .region(Region.US_WEST_2) .build(); HeadObjectResponse headObject = s3Client.headObject(HeadObjectRequest.builder() .bucket(BUCKET) .key(OBJECT_PATH) .build()); long zipSize = headObject.contentLength(); // fetch the last 22 bytes (end-of-central-directory record; assuming the comment field is empty) long eocdStart = zipSize - 22; final var eocdStream = s3Client.getObject(GetObjectRequest.builder() .bucket(BUCKET) .key(OBJECT_PATH) .range("bytes=%d-%d".formatted(eocdStart, zipSize)) .build()); System.out.println("eocd start: " + eocdStart); byte[] eocd = IOUtils.toByteArray(eocdStream); // get the start offset and size of the central directory int cdSize = byteArrayToLeInt(Arrays.copyOfRange(eocd, 12, 16)); int cdStart = byteArrayToLeInt(Arrays.copyOfRange(eocd, 16, 20)); System.out.println("cdStart: " + cdStart); System.out.println("cdSize: " + cdSize); // get the full central directory final var cdStream = s3Client.getObject(GetObjectRequest.builder() .bucket(BUCKET) .key(OBJECT_PATH) .range("bytes=%d-%d".formatted(cdStart, cdStart + cdSize - 1)) .build()); byte[] cd = IOUtils.toByteArray(cdStream); // write the full dir + eocd: ByteArrayOutputStream out = new ByteArrayOutputStream(); // write cd out.write(cd); // write eocd, resetting the cd start to 0 since that is // where it will appear in our new temp file byte[] b = leIntToByteArray(0); eocd[16] = b[0]; eocd[17] = b[1]; eocd[18] = b[2]; eocd[19] = b[3]; out.write(eocd); out.flush(); byte[] cdbytes = out.toByteArray(); System.out.println(cdbytes.length); File tempFile = Files.createTempFile("temp", "zip").toFile(); FileOutputStream output = new FileOutputStream(tempFile); output.write(cdbytes); output.flush(); output.close(); getZipFile1(s3Client, tempFile, "a2ed09e5-dfdb-4a66-95f5-8bb62bc8fafd-2023-05-23T10_07_19Z.warc.gz"); getZipFile1(s3Client, tempFile, "index.cdx.gz"); getZipFile1(s3Client, tempFile, "index.cdx"); getZipFile1(s3Client, tempFile, "extraPages.jsonl"); getZipFile1(s3Client, tempFile, "pages.jsonl"); getZipFile1(s3Client, tempFile, "datapackage.json"); getZipFile1(s3Client, tempFile, "datapackage-digest.json");}private static void getZipFile1(S3Client s3Client, File tempFile, String file) throws Exception { ZipFile zipFile = new ZipFile(tempFile); for (var header : zipFile.getFileHeaders()) { if (!header.isDirectory()) { if (header.getFileName().contains(file)) { System.out.println(header); long offset = header.getOffsetLocalHeader(); // 41489906 int compressedSize = (int) header.getCompressedSize(); // 171 long endFile = 30 + offset + header.getFileNameLength() + compressedSize - 1; byte[] fileBytes = IOUtils.toByteArray(s3Client.getObject(GetObjectRequest.builder() .bucket(BUCKET) .key(OBJECT_PATH) .range("bytes=%d-%d".formatted(offset, endFile)) .build())); ZipInputStream zipInputStream = new ZipInputStream(new ByteArrayInputStream(fileBytes)); zipInputStream.getNextEntry(header, true); File outputFile = new File("/home/joao/Downloads/folder/" + header.getFileName()); Files.deleteIfExists(outputFile.toPath()); FileUtils.copyInputStreamToFile(zipInputStream, outputFile); } } }}