001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.utils.IOUtils; 037 038import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 039import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 040import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 041import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 042 043/** 044 * Implements an input stream that can read Zip archives. 045 * 046 * <p>Note that {@link ZipArchiveEntry#getSize()} may return -1 if the 047 * DEFLATE algorithm is used, as the size information is not available 048 * from the header.</p> 049 * 050 * <p>The {@link ZipFile} class is preferred when reading from files.</p> 051 * 052 * <p>As of Apache Commons Compress it transparently supports Zip64 053 * extensions and thus individual entries and archives larger than 4 054 * GB or with more than 65536 entries.</p> 055 * 056 * @see ZipFile 057 * @NotThreadSafe 058 */ 059public class ZipArchiveInputStream extends ArchiveInputStream { 060 061 /** The zip encoding to use for filenames and the file comment. */ 062 private final ZipEncoding zipEncoding; 063 064 // the provided encoding (for unit tests) 065 final String encoding; 066 067 /** Whether to look for and use Unicode extra fields. */ 068 private final boolean useUnicodeExtraFields; 069 070 /** Wrapped stream, will always be a PushbackInputStream. */ 071 private final InputStream in; 072 073 /** Inflater used for all deflated entries. */ 074 private final Inflater inf = new Inflater(true); 075 076 /** Buffer used to read from the wrapped stream. */ 077 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 078 079 /** The entry that is currently being read. */ 080 private CurrentEntry current = null; 081 082 /** Whether the stream has been closed. */ 083 private boolean closed = false; 084 085 /** Whether the stream has reached the central directory - and thus found all entries. */ 086 private boolean hitCentralDirectory = false; 087 088 /** 089 * When reading a stored entry that uses the data descriptor this 090 * stream has to read the full entry and caches it. This is the 091 * cache. 092 */ 093 private ByteArrayInputStream lastStoredEntry = null; 094 095 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 096 private boolean allowStoredEntriesWithDataDescriptor = false; 097 098 private static final int LFH_LEN = 30; 099 /* 100 local file header signature WORD 101 version needed to extract SHORT 102 general purpose bit flag SHORT 103 compression method SHORT 104 last mod file time SHORT 105 last mod file date SHORT 106 crc-32 WORD 107 compressed size WORD 108 uncompressed size WORD 109 file name length SHORT 110 extra field length SHORT 111 */ 112 113 private static final int CFH_LEN = 46; 114 /* 115 central file header signature WORD 116 version made by SHORT 117 version needed to extract SHORT 118 general purpose bit flag SHORT 119 compression method SHORT 120 last mod file time SHORT 121 last mod file date SHORT 122 crc-32 WORD 123 compressed size WORD 124 uncompressed size WORD 125 file name length SHORT 126 extra field length SHORT 127 file comment length SHORT 128 disk number start SHORT 129 internal file attributes SHORT 130 external file attributes WORD 131 relative offset of local header WORD 132 */ 133 134 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 135 136 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 137 private final byte[] LFH_BUF = new byte[LFH_LEN]; 138 private final byte[] SKIP_BUF = new byte[1024]; 139 private final byte[] SHORT_BUF = new byte[SHORT]; 140 private final byte[] WORD_BUF = new byte[WORD]; 141 private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD]; 142 143 private int entriesRead = 0; 144 145 /** 146 * Create an instance using UTF-8 encoding 147 * @param inputStream the stream to wrap 148 */ 149 public ZipArchiveInputStream(InputStream inputStream) { 150 this(inputStream, ZipEncodingHelper.UTF8); 151 } 152 153 /** 154 * @param encoding the encoding to use for file names, use null 155 * for the platform's default encoding 156 * @since 1.5 157 */ 158 public ZipArchiveInputStream(InputStream inputStream, String encoding) { 159 this(inputStream, encoding, true); 160 } 161 162 /** 163 * @param encoding the encoding to use for file names, use null 164 * for the platform's default encoding 165 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 166 * Extra Fields (if present) to set the file names. 167 */ 168 public ZipArchiveInputStream(InputStream inputStream, String encoding, boolean useUnicodeExtraFields) { 169 this(inputStream, encoding, useUnicodeExtraFields, false); 170 } 171 172 /** 173 * @param encoding the encoding to use for file names, use null 174 * for the platform's default encoding 175 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 176 * Extra Fields (if present) to set the file names. 177 * @param allowStoredEntriesWithDataDescriptor whether the stream 178 * will try to read STORED entries that use a data descriptor 179 * @since 1.1 180 */ 181 public ZipArchiveInputStream(InputStream inputStream, 182 String encoding, 183 boolean useUnicodeExtraFields, 184 boolean allowStoredEntriesWithDataDescriptor) { 185 this.encoding = encoding; 186 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 187 this.useUnicodeExtraFields = useUnicodeExtraFields; 188 in = new PushbackInputStream(inputStream, buf.capacity()); 189 this.allowStoredEntriesWithDataDescriptor = 190 allowStoredEntriesWithDataDescriptor; 191 // haven't read anything so far 192 buf.limit(0); 193 } 194 195 public ZipArchiveEntry getNextZipEntry() throws IOException { 196 boolean firstEntry = true; 197 if (closed || hitCentralDirectory) { 198 return null; 199 } 200 if (current != null) { 201 closeEntry(); 202 firstEntry = false; 203 } 204 205 try { 206 if (firstEntry) { 207 // split archives have a special signature before the 208 // first local file header - look for it and fail with 209 // the appropriate error message if this is a split 210 // archive. 211 readFirstLocalFileHeader(LFH_BUF); 212 } else { 213 readFully(LFH_BUF); 214 } 215 } catch (EOFException e) { 216 return null; 217 } 218 219 ZipLong sig = new ZipLong(LFH_BUF); 220 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 221 hitCentralDirectory = true; 222 skipRemainderOfArchive(); 223 } 224 if (!sig.equals(ZipLong.LFH_SIG)) { 225 return null; 226 } 227 228 int off = WORD; 229 current = new CurrentEntry(); 230 231 int versionMadeBy = ZipShort.getValue(LFH_BUF, off); 232 off += SHORT; 233 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 234 235 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off); 236 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 237 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 238 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 239 current.entry.setGeneralPurposeBit(gpFlag); 240 241 off += SHORT; 242 243 current.entry.setMethod(ZipShort.getValue(LFH_BUF, off)); 244 off += SHORT; 245 246 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off)); 247 current.entry.setTime(time); 248 off += WORD; 249 250 ZipLong size = null, cSize = null; 251 if (!current.hasDataDescriptor) { 252 current.entry.setCrc(ZipLong.getValue(LFH_BUF, off)); 253 off += WORD; 254 255 cSize = new ZipLong(LFH_BUF, off); 256 off += WORD; 257 258 size = new ZipLong(LFH_BUF, off); 259 off += WORD; 260 } else { 261 off += 3 * WORD; 262 } 263 264 int fileNameLen = ZipShort.getValue(LFH_BUF, off); 265 266 off += SHORT; 267 268 int extraLen = ZipShort.getValue(LFH_BUF, off); 269 off += SHORT; 270 271 byte[] fileName = new byte[fileNameLen]; 272 readFully(fileName); 273 current.entry.setName(entryEncoding.decode(fileName), fileName); 274 275 byte[] extraData = new byte[extraLen]; 276 readFully(extraData); 277 current.entry.setExtra(extraData); 278 279 if (!hasUTF8Flag && useUnicodeExtraFields) { 280 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 281 } 282 283 processZip64Extra(size, cSize); 284 285 if (current.entry.getCompressedSize() != ZipArchiveEntry.SIZE_UNKNOWN) { 286 if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 287 current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 288 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 289 current.in = new ExplodingInputStream( 290 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 291 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 292 new BoundedInputStream(in, current.entry.getCompressedSize())); 293 } 294 } 295 296 entriesRead++; 297 return current.entry; 298 } 299 300 /** 301 * Fills the given array with the first local file header and 302 * deals with splitting/spanning markers that may prefix the first 303 * LFH. 304 */ 305 private void readFirstLocalFileHeader(byte[] lfh) throws IOException { 306 readFully(lfh); 307 ZipLong sig = new ZipLong(lfh); 308 if (sig.equals(ZipLong.DD_SIG)) { 309 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 310 } 311 312 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 313 // The archive is not really split as only one segment was 314 // needed in the end. Just skip over the marker. 315 byte[] missedLfhBytes = new byte[4]; 316 readFully(missedLfhBytes); 317 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 318 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 319 } 320 } 321 322 /** 323 * Records whether a Zip64 extra is present and sets the size 324 * information from it if sizes are 0xFFFFFFFF and the entry 325 * doesn't use a data descriptor. 326 */ 327 private void processZip64Extra(ZipLong size, ZipLong cSize) { 328 Zip64ExtendedInformationExtraField z64 = 329 (Zip64ExtendedInformationExtraField) 330 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 331 current.usesZip64 = z64 != null; 332 if (!current.hasDataDescriptor) { 333 if (z64 != null // same as current.usesZip64 but avoids NPE warning 334 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 335 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 336 current.entry.setSize(z64.getSize().getLongValue()); 337 } else { 338 current.entry.setCompressedSize(cSize.getValue()); 339 current.entry.setSize(size.getValue()); 340 } 341 } 342 } 343 344 @Override 345 public ArchiveEntry getNextEntry() throws IOException { 346 return getNextZipEntry(); 347 } 348 349 /** 350 * Whether this class is able to read the given entry. 351 * 352 * <p>May return false if it is set up to use encryption or a 353 * compression method that hasn't been implemented yet.</p> 354 * @since 1.1 355 */ 356 @Override 357 public boolean canReadEntryData(ArchiveEntry ae) { 358 if (ae instanceof ZipArchiveEntry) { 359 ZipArchiveEntry ze = (ZipArchiveEntry) ae; 360 return ZipUtil.canHandleEntryData(ze) 361 && supportsDataDescriptorFor(ze); 362 363 } 364 return false; 365 } 366 367 @Override 368 public int read(byte[] buffer, int offset, int length) throws IOException { 369 if (closed) { 370 throw new IOException("The stream is closed"); 371 } 372 373 if (current == null) { 374 return -1; 375 } 376 377 // avoid int overflow, check null buffer 378 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 379 throw new ArrayIndexOutOfBoundsException(); 380 } 381 382 ZipUtil.checkRequestedFeatures(current.entry); 383 if (!supportsDataDescriptorFor(current.entry)) { 384 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 385 current.entry); 386 } 387 388 int read; 389 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 390 read = readStored(buffer, offset, length); 391 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 392 read = readDeflated(buffer, offset, length); 393 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 394 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 395 read = current.in.read(buffer, offset, length); 396 } else { 397 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 398 current.entry); 399 } 400 401 if (read >= 0) { 402 current.crc.update(buffer, offset, read); 403 } 404 405 return read; 406 } 407 408 /** 409 * Implementation of read for STORED entries. 410 */ 411 private int readStored(byte[] buffer, int offset, int length) throws IOException { 412 413 if (current.hasDataDescriptor) { 414 if (lastStoredEntry == null) { 415 readStoredEntry(); 416 } 417 return lastStoredEntry.read(buffer, offset, length); 418 } 419 420 long csize = current.entry.getSize(); 421 if (current.bytesRead >= csize) { 422 return -1; 423 } 424 425 if (buf.position() >= buf.limit()) { 426 buf.position(0); 427 int l = in.read(buf.array()); 428 if (l == -1) { 429 return -1; 430 } 431 buf.limit(l); 432 433 count(l); 434 current.bytesReadFromStream += l; 435 } 436 437 int toRead = Math.min(buf.remaining(), length); 438 if ((csize - current.bytesRead) < toRead) { 439 // if it is smaller than toRead then it fits into an int 440 toRead = (int) (csize - current.bytesRead); 441 } 442 buf.get(buffer, offset, toRead); 443 current.bytesRead += toRead; 444 return toRead; 445 } 446 447 /** 448 * Implementation of read for DEFLATED entries. 449 */ 450 private int readDeflated(byte[] buffer, int offset, int length) throws IOException { 451 int read = readFromInflater(buffer, offset, length); 452 if (read <= 0) { 453 if (inf.finished()) { 454 return -1; 455 } else if (inf.needsDictionary()) { 456 throw new ZipException("This archive needs a preset dictionary" 457 + " which is not supported by Commons" 458 + " Compress."); 459 } else if (read == -1) { 460 throw new IOException("Truncated ZIP file"); 461 } 462 } 463 return read; 464 } 465 466 /** 467 * Potentially reads more bytes to fill the inflater's buffer and 468 * reads from it. 469 */ 470 private int readFromInflater(byte[] buffer, int offset, int length) throws IOException { 471 int read = 0; 472 do { 473 if (inf.needsInput()) { 474 int l = fill(); 475 if (l > 0) { 476 current.bytesReadFromStream += buf.limit(); 477 } else if (l == -1) { 478 return -1; 479 } else { 480 break; 481 } 482 } 483 try { 484 read = inf.inflate(buffer, offset, length); 485 } catch (DataFormatException e) { 486 throw (IOException) new ZipException(e.getMessage()).initCause(e); 487 } 488 } while (read == 0 && inf.needsInput()); 489 return read; 490 } 491 492 @Override 493 public void close() throws IOException { 494 if (!closed) { 495 closed = true; 496 in.close(); 497 inf.end(); 498 } 499 } 500 501 /** 502 * Skips over and discards value bytes of data from this input 503 * stream. 504 * 505 * <p>This implementation may end up skipping over some smaller 506 * number of bytes, possibly 0, if and only if it reaches the end 507 * of the underlying stream.</p> 508 * 509 * <p>The actual number of bytes skipped is returned.</p> 510 * 511 * @param value the number of bytes to be skipped. 512 * @return the actual number of bytes skipped. 513 * @throws IOException - if an I/O error occurs. 514 * @throws IllegalArgumentException - if value is negative. 515 */ 516 @Override 517 public long skip(long value) throws IOException { 518 if (value >= 0) { 519 long skipped = 0; 520 while (skipped < value) { 521 long rem = value - skipped; 522 int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 523 if (x == -1) { 524 return skipped; 525 } 526 skipped += x; 527 } 528 return skipped; 529 } 530 throw new IllegalArgumentException(); 531 } 532 533 /** 534 * Checks if the signature matches what is expected for a zip file. 535 * Does not currently handle self-extracting zips which may have arbitrary 536 * leading content. 537 * 538 * @param signature the bytes to check 539 * @param length the number of bytes to check 540 * @return true, if this stream is a zip archive stream, false otherwise 541 */ 542 public static boolean matches(byte[] signature, int length) { 543 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 544 return false; 545 } 546 547 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 548 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 549 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 550 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 551 } 552 553 private static boolean checksig(byte[] signature, byte[] expected) { 554 for (int i = 0; i < expected.length; i++) { 555 if (signature[i] != expected[i]) { 556 return false; 557 } 558 } 559 return true; 560 } 561 562 /** 563 * Closes the current ZIP archive entry and positions the underlying 564 * stream to the beginning of the next entry. All per-entry variables 565 * and data structures are cleared. 566 * <p> 567 * If the compressed size of this entry is included in the entry header, 568 * then any outstanding bytes are simply skipped from the underlying 569 * stream without uncompressing them. This allows an entry to be safely 570 * closed even if the compression method is unsupported. 571 * <p> 572 * In case we don't know the compressed size of this entry or have 573 * already buffered too much data from the underlying stream to support 574 * uncompression, then the uncompression process is completed and the 575 * end position of the stream is adjusted based on the result of that 576 * process. 577 * 578 * @throws IOException if an error occurs 579 */ 580 private void closeEntry() throws IOException { 581 if (closed) { 582 throw new IOException("The stream is closed"); 583 } 584 if (current == null) { 585 return; 586 } 587 588 // Ensure all entry bytes are read 589 if (current.bytesReadFromStream <= current.entry.getCompressedSize() 590 && !current.hasDataDescriptor) { 591 drainCurrentEntryData(); 592 } else { 593 skip(Long.MAX_VALUE); 594 595 long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 596 ? getBytesInflated() : current.bytesRead; 597 598 // this is at most a single read() operation and can't 599 // exceed the range of int 600 int diff = (int) (current.bytesReadFromStream - inB); 601 602 // Pushback any required bytes 603 if (diff > 0) { 604 pushback(buf.array(), buf.limit() - diff, diff); 605 } 606 } 607 608 if (lastStoredEntry == null && current.hasDataDescriptor) { 609 readDataDescriptor(); 610 } 611 612 inf.reset(); 613 buf.clear().flip(); 614 current = null; 615 lastStoredEntry = null; 616 } 617 618 /** 619 * Read all data of the current entry from the underlying stream 620 * that hasn't been read, yet. 621 */ 622 private void drainCurrentEntryData() throws IOException { 623 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 624 while (remaining > 0) { 625 long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 626 if (n < 0) { 627 throw new EOFException("Truncated ZIP entry: " + current.entry.getName()); 628 } else { 629 count(n); 630 remaining -= n; 631 } 632 } 633 } 634 635 /** 636 * Get the number of bytes Inflater has actually processed. 637 * 638 * <p>for Java < Java7 the getBytes* methods in 639 * Inflater/Deflater seem to return unsigned ints rather than 640 * longs that start over with 0 at 2^32.</p> 641 * 642 * <p>The stream knows how many bytes it has read, but not how 643 * many the Inflater actually consumed - it should be between the 644 * total number of bytes read for the entry and the total number 645 * minus the last read operation. Here we just try to make the 646 * value close enough to the bytes we've read by assuming the 647 * number of bytes consumed must be smaller than (or equal to) the 648 * number of bytes read but not smaller by more than 2^32.</p> 649 */ 650 private long getBytesInflated() { 651 long inB = inf.getBytesRead(); 652 if (current.bytesReadFromStream >= TWO_EXP_32) { 653 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 654 inB += TWO_EXP_32; 655 } 656 } 657 return inB; 658 } 659 660 private int fill() throws IOException { 661 if (closed) { 662 throw new IOException("The stream is closed"); 663 } 664 int length = in.read(buf.array()); 665 if (length > 0) { 666 buf.limit(length); 667 count(buf.limit()); 668 inf.setInput(buf.array(), 0, buf.limit()); 669 } 670 return length; 671 } 672 673 private void readFully(byte[] b) throws IOException { 674 int count = IOUtils.readFully(in, b); 675 count(count); 676 if (count < b.length) { 677 throw new EOFException(); 678 } 679 } 680 681 private void readDataDescriptor() throws IOException { 682 readFully(WORD_BUF); 683 ZipLong val = new ZipLong(WORD_BUF); 684 if (ZipLong.DD_SIG.equals(val)) { 685 // data descriptor with signature, skip sig 686 readFully(WORD_BUF); 687 val = new ZipLong(WORD_BUF); 688 } 689 current.entry.setCrc(val.getValue()); 690 691 // if there is a ZIP64 extra field, sizes are eight bytes 692 // each, otherwise four bytes each. Unfortunately some 693 // implementations - namely Java7 - use eight bytes without 694 // using a ZIP64 extra field - 695 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 696 697 // just read 16 bytes and check whether bytes nine to twelve 698 // look like one of the signatures of what could follow a data 699 // descriptor (ignoring archive decryption headers for now). 700 // If so, push back eight bytes and assume sizes are four 701 // bytes, otherwise sizes are eight bytes each. 702 readFully(TWO_DWORD_BUF); 703 ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD); 704 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 705 pushback(TWO_DWORD_BUF, DWORD, DWORD); 706 current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF)); 707 current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD)); 708 } else { 709 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF)); 710 current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD)); 711 } 712 } 713 714 /** 715 * Whether this entry requires a data descriptor this library can work with. 716 * 717 * @return true if allowStoredEntriesWithDataDescriptor is true, 718 * the entry doesn't require any data descriptor or the method is 719 * DEFLATED. 720 */ 721 private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) { 722 return !entry.getGeneralPurposeBit().usesDataDescriptor() 723 724 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 725 || entry.getMethod() == ZipEntry.DEFLATED; 726 } 727 728 /** 729 * Caches a stored entry that uses the data descriptor. 730 * 731 * <ul> 732 * <li>Reads a stored entry until the signature of a local file 733 * header, central directory header or data descriptor has been 734 * found.</li> 735 * <li>Stores all entry data in lastStoredEntry.</p> 736 * <li>Rewinds the stream to position at the data 737 * descriptor.</li> 738 * <li>reads the data descriptor</li> 739 * </ul> 740 * 741 * <p>After calling this method the entry should know its size, 742 * the entry's data is cached and the stream is positioned at the 743 * next local file or central directory header.</p> 744 */ 745 private void readStoredEntry() throws IOException { 746 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 747 int off = 0; 748 boolean done = false; 749 750 // length of DD without signature 751 int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 752 753 while (!done) { 754 int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 755 if (r <= 0) { 756 // read the whole archive without ever finding a 757 // central directory 758 throw new IOException("Truncated ZIP file"); 759 } 760 if (r + off < 4) { 761 // buffer too small to check for a signature, loop 762 off += r; 763 continue; 764 } 765 766 done = bufferContainsSignature(bos, off, r, ddLen); 767 if (!done) { 768 off = cacheBytesRead(bos, off, r, ddLen); 769 } 770 } 771 772 byte[] b = bos.toByteArray(); 773 lastStoredEntry = new ByteArrayInputStream(b); 774 } 775 776 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 777 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 778 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 779 780 /** 781 * Checks whether the current buffer contains the signature of a 782 * "data descriptor", "local file header" or 783 * "central directory entry". 784 * 785 * <p>If it contains such a signature, reads the data descriptor 786 * and positions the stream right after the data descriptor.</p> 787 */ 788 private boolean bufferContainsSignature(ByteArrayOutputStream bos, int offset, int lastRead, int expectedDDLen) 789 throws IOException { 790 791 boolean done = false; 792 int readTooMuch = 0; 793 for (int i = 0; !done && i < lastRead - 4; i++) { 794 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 795 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 796 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 797 // found a LFH or CFH: 798 readTooMuch = offset + lastRead - i - expectedDDLen; 799 done = true; 800 } 801 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 802 // found DD: 803 readTooMuch = offset + lastRead - i; 804 done = true; 805 } 806 if (done) { 807 // * push back bytes read in excess as well as the data 808 // descriptor 809 // * copy the remaining bytes to cache 810 // * read data descriptor 811 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 812 bos.write(buf.array(), 0, i); 813 readDataDescriptor(); 814 } 815 } 816 } 817 return done; 818 } 819 820 /** 821 * If the last read bytes could hold a data descriptor and an 822 * incomplete signature then save the last bytes to the front of 823 * the buffer and cache everything in front of the potential data 824 * descriptor into the given ByteArrayOutputStream. 825 * 826 * <p>Data descriptor plus incomplete signature (3 bytes in the 827 * worst case) can be 20 bytes max.</p> 828 */ 829 private int cacheBytesRead(ByteArrayOutputStream bos, int offset, int lastRead, int expecteDDLen) { 830 final int cacheable = offset + lastRead - expecteDDLen - 3; 831 if (cacheable > 0) { 832 bos.write(buf.array(), 0, cacheable); 833 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 834 offset = expecteDDLen + 3; 835 } else { 836 offset += lastRead; 837 } 838 return offset; 839 } 840 841 private void pushback(byte[] buf, int offset, int length) throws IOException { 842 ((PushbackInputStream) in).unread(buf, offset, length); 843 pushedBackBytes(length); 844 } 845 846 // End of Central Directory Record 847 // end of central dir signature WORD 848 // number of this disk SHORT 849 // number of the disk with the 850 // start of the central directory SHORT 851 // total number of entries in the 852 // central directory on this disk SHORT 853 // total number of entries in 854 // the central directory SHORT 855 // size of the central directory WORD 856 // offset of start of central 857 // directory with respect to 858 // the starting disk number WORD 859 // .ZIP file comment length SHORT 860 // .ZIP file comment up to 64KB 861 // 862 863 /** 864 * Reads the stream until it find the "End of central directory 865 * record" and consumes it as well. 866 */ 867 private void skipRemainderOfArchive() throws IOException { 868 // skip over central directory. One LFH has been read too much 869 // already. The calculation discounts file names and extra 870 // data so it will be too short. 871 realSkip(entriesRead * CFH_LEN - LFH_LEN); 872 findEocdRecord(); 873 realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 874 readFully(SHORT_BUF); 875 // file comment 876 realSkip(ZipShort.getValue(SHORT_BUF)); 877 } 878 879 /** 880 * Reads forward until the signature of the "End of central 881 * directory" record is found. 882 */ 883 private void findEocdRecord() throws IOException { 884 int currentByte = -1; 885 boolean skipReadCall = false; 886 while (skipReadCall || (currentByte = readOneByte()) > -1) { 887 skipReadCall = false; 888 if (!isFirstByteOfEocdSig(currentByte)) { 889 continue; 890 } 891 currentByte = readOneByte(); 892 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 893 if (currentByte == -1) { 894 break; 895 } 896 skipReadCall = isFirstByteOfEocdSig(currentByte); 897 continue; 898 } 899 currentByte = readOneByte(); 900 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 901 if (currentByte == -1) { 902 break; 903 } 904 skipReadCall = isFirstByteOfEocdSig(currentByte); 905 continue; 906 } 907 currentByte = readOneByte(); 908 if (currentByte == -1 909 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 910 break; 911 } 912 skipReadCall = isFirstByteOfEocdSig(currentByte); 913 } 914 } 915 916 /** 917 * Skips bytes by reading from the underlying stream rather than 918 * the (potentially inflating) archive stream - which {@link 919 * #skip} would do. 920 * 921 * Also updates bytes-read counter. 922 */ 923 private void realSkip(long value) throws IOException { 924 if (value >= 0) { 925 long skipped = 0; 926 while (skipped < value) { 927 long rem = value - skipped; 928 int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 929 if (x == -1) { 930 return; 931 } 932 count(x); 933 skipped += x; 934 } 935 return; 936 } 937 throw new IllegalArgumentException(); 938 } 939 940 /** 941 * Reads bytes by reading from the underlying stream rather than 942 * the (potentially inflating) archive stream - which {@link #read} would do. 943 * 944 * Also updates bytes-read counter. 945 */ 946 private int readOneByte() throws IOException { 947 int b = in.read(); 948 if (b != -1) { 949 count(1); 950 } 951 return b; 952 } 953 954 private boolean isFirstByteOfEocdSig(int b) { 955 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 956 } 957 958 /** 959 * Structure collecting information for the entry that is 960 * currently being read. 961 */ 962 private static final class CurrentEntry { 963 964 /** 965 * Current ZIP entry. 966 */ 967 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 968 969 /** 970 * Does the entry use a data descriptor? 971 */ 972 private boolean hasDataDescriptor; 973 974 /** 975 * Does the entry have a ZIP64 extended information extra field. 976 */ 977 private boolean usesZip64; 978 979 /** 980 * Number of bytes of entry content read by the client if the 981 * entry is STORED. 982 */ 983 private long bytesRead; 984 985 /** 986 * Number of bytes of entry content read so from the stream. 987 * 988 * <p>This may be more than the actual entry's length as some 989 * stuff gets buffered up and needs to be pushed back when the 990 * end of the entry has been reached.</p> 991 */ 992 private long bytesReadFromStream; 993 994 /** 995 * The checksum calculated as the current entry is read. 996 */ 997 private final CRC32 crc = new CRC32(); 998 999 /** 1000 * The input stream decompressing the data for shrunk and imploded entries. 1001 */ 1002 private InputStream in; 1003 } 1004 1005 /** 1006 * Bounded input stream adapted from commons-io 1007 */ 1008 private class BoundedInputStream extends InputStream { 1009 1010 /** the wrapped input stream */ 1011 private final InputStream in; 1012 1013 /** the max length to provide */ 1014 private final long max; 1015 1016 /** the number of bytes already returned */ 1017 private long pos = 0; 1018 1019 /** 1020 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1021 * stream and limits it to a certain size. 1022 * 1023 * @param in The wrapped input stream 1024 * @param size The maximum number of bytes to return 1025 */ 1026 public BoundedInputStream(final InputStream in, final long size) { 1027 this.max = size; 1028 this.in = in; 1029 } 1030 1031 @Override 1032 public int read() throws IOException { 1033 if (max >= 0 && pos >= max) { 1034 return -1; 1035 } 1036 final int result = in.read(); 1037 pos++; 1038 count(1); 1039 current.bytesReadFromStream++; 1040 return result; 1041 } 1042 1043 @Override 1044 public int read(final byte[] b) throws IOException { 1045 return this.read(b, 0, b.length); 1046 } 1047 1048 @Override 1049 public int read(final byte[] b, final int off, final int len) throws IOException { 1050 if (max >= 0 && pos >= max) { 1051 return -1; 1052 } 1053 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1054 final int bytesRead = in.read(b, off, (int) maxRead); 1055 1056 if (bytesRead == -1) { 1057 return -1; 1058 } 1059 1060 pos += bytesRead; 1061 count(bytesRead); 1062 current.bytesReadFromStream += bytesRead; 1063 return bytesRead; 1064 } 1065 1066 @Override 1067 public long skip(final long n) throws IOException { 1068 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1069 final long skippedBytes = in.skip(toSkip); 1070 pos += skippedBytes; 1071 return skippedBytes; 1072 } 1073 1074 @Override 1075 public int available() throws IOException { 1076 if (max >= 0 && pos >= max) { 1077 return 0; 1078 } 1079 return in.available(); 1080 } 1081 } 1082}