001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025
026import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
027import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
028import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
029import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
030import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
031import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
032import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
033import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
034import org.apache.commons.compress.archivers.sevenz.SevenZFile;
035import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
036import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
037import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
038import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
039import org.apache.commons.compress.utils.IOUtils;
040
041/**
042 * Factory to create Archive[In|Out]putStreams from names or the first bytes of
043 * the InputStream. In order to add other implementations, you should extend
044 * ArchiveStreamFactory and override the appropriate methods (and call their
045 * implementation from super of course).
046 * 
047 * Compressing a ZIP-File:
048 * 
049 * <pre>
050 * final OutputStream out = new FileOutputStream(output); 
051 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
052 * 
053 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
054 * IOUtils.copy(new FileInputStream(file1), os);
055 * os.closeArchiveEntry();
056 *
057 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
058 * IOUtils.copy(new FileInputStream(file2), os);
059 * os.closeArchiveEntry();
060 * os.close();
061 * </pre>
062 * 
063 * Decompressing a ZIP-File:
064 * 
065 * <pre>
066 * final InputStream is = new FileInputStream(input); 
067 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
068 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry();
069 * OutputStream out = new FileOutputStream(new File(dir, entry.getName()));
070 * IOUtils.copy(in, out);
071 * out.close();
072 * in.close();
073 * </pre>
074 * @Immutable provided that the deprecated method setEntryEncoding is not used.
075 * @ThreadSafe even if the deprecated method setEntryEncoding is used
076 */
077public class ArchiveStreamFactory {
078
079    /**
080     * Constant (value {@value}) used to identify the AR archive format.
081     * @since 1.1
082     */
083    public static final String AR = "ar";
084    /**
085     * Constant (value {@value}) used to identify the ARJ archive format.
086     * Not supported as an output stream type.
087     * @since 1.6
088     */
089    public static final String ARJ = "arj";
090    /**
091     * Constant (value {@value}) used to identify the CPIO archive format.
092     * @since 1.1
093     */
094    public static final String CPIO = "cpio";
095    /**
096     * Constant (value {@value}) used to identify the Unix DUMP archive format.
097     * Not supported as an output stream type.
098     * @since 1.3
099     */
100    public static final String DUMP = "dump";
101    /**
102     * Constant (value {@value}) used to identify the JAR archive format.
103     * @since 1.1
104     */
105    public static final String JAR = "jar";
106    /**
107     * Constant used to identify the TAR archive format.
108     * @since 1.1
109     */
110    public static final String TAR = "tar";
111    /**
112     * Constant (value {@value}) used to identify the ZIP archive format.
113     * @since 1.1
114     */
115    public static final String ZIP = "zip";
116    /**
117     * Constant (value {@value}) used to identify the 7z archive format.
118     * @since 1.8
119     */
120    public static final String SEVEN_Z = "7z";
121
122    /**
123     * Entry encoding, null for the platform default.
124     */
125    private final String encoding;
126
127    /**
128     * Entry encoding, null for the default.
129     */
130    private volatile String entryEncoding = null;
131
132    /**
133     * Create an instance using the platform default encoding.
134     */
135    public ArchiveStreamFactory() {
136        this(null);
137    }
138
139    /**
140     * Create an instance using the specified encoding.
141     *
142     * @param encoding the encoding to be used.
143     *
144     * @since 1.10
145     */
146    public ArchiveStreamFactory(String encoding) {
147        super();
148        this.encoding = encoding;
149        // Also set the original field so can continue to use it.
150        this.entryEncoding = encoding;
151    }
152
153    /**
154     * Returns the encoding to use for arj, jar, zip, dump, cpio and tar
155     * files, or null for the archiver default.
156     *
157     * @return entry encoding, or null for the archiver default
158     * @since 1.5
159     */
160    public String getEntryEncoding() {
161        return entryEncoding;
162    }
163
164    /**
165     * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default.
166     * 
167     * @param entryEncoding the entry encoding, null uses the archiver default.
168     * @since 1.5
169     * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
170     * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} 
171     * was used to specify the factory encoding.
172     */
173    @Deprecated
174    public void setEntryEncoding(String entryEncoding) {
175        // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway
176        if (encoding != null) {
177            throw new IllegalStateException("Cannot overide encoding set by the constructor");
178        }
179        this.entryEncoding = entryEncoding;
180    }
181
182    /**
183     * Create an archive input stream from an archiver name and an input stream.
184     * 
185     * @param archiverName the archive name,
186     * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z}
187     * @param in the input stream
188     * @return the archive input stream
189     * @throws ArchiveException if the archiver name is not known
190     * @throws StreamingNotSupportedException if the format cannot be
191     * read from a stream
192     * @throws IllegalArgumentException if the archiver name or stream is null
193     */
194    public ArchiveInputStream createArchiveInputStream(
195            final String archiverName, final InputStream in)
196            throws ArchiveException {
197
198        if (archiverName == null) {
199            throw new IllegalArgumentException("Archivername must not be null.");
200        }
201
202        if (in == null) {
203            throw new IllegalArgumentException("InputStream must not be null.");
204        }
205
206        if (AR.equalsIgnoreCase(archiverName)) {
207            return new ArArchiveInputStream(in);
208        }
209        if (ARJ.equalsIgnoreCase(archiverName)) {
210            if (entryEncoding != null) {
211                return new ArjArchiveInputStream(in, entryEncoding);
212            } else {
213                return new ArjArchiveInputStream(in);
214            }
215        }
216        if (ZIP.equalsIgnoreCase(archiverName)) {
217            if (entryEncoding != null) {
218                return new ZipArchiveInputStream(in, entryEncoding);
219            } else {
220                return new ZipArchiveInputStream(in);
221            }
222        }
223        if (TAR.equalsIgnoreCase(archiverName)) {
224            if (entryEncoding != null) {
225                return new TarArchiveInputStream(in, entryEncoding);
226            } else {
227                return new TarArchiveInputStream(in);
228            }
229        }
230        if (JAR.equalsIgnoreCase(archiverName)) {
231            if (entryEncoding != null) {
232                return new JarArchiveInputStream(in, entryEncoding);
233            } else {
234                return new JarArchiveInputStream(in);
235            }
236        }
237        if (CPIO.equalsIgnoreCase(archiverName)) {
238            if (entryEncoding != null) {
239                return new CpioArchiveInputStream(in, entryEncoding);
240            } else {
241                return new CpioArchiveInputStream(in);
242            }
243        }
244        if (DUMP.equalsIgnoreCase(archiverName)) {
245            if (entryEncoding != null) {
246                return new DumpArchiveInputStream(in, entryEncoding);
247            } else {
248                return new DumpArchiveInputStream(in);
249            }
250        }
251        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
252            throw new StreamingNotSupportedException(SEVEN_Z);
253        }
254
255        throw new ArchiveException("Archiver: " + archiverName + " not found.");
256    }
257
258    /**
259     * Create an archive output stream from an archiver name and an output stream.
260     * 
261     * @param archiverName the archive name,
262     * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} 
263     * @param out the output stream
264     * @return the archive output stream
265     * @throws ArchiveException if the archiver name is not known
266     * @throws StreamingNotSupportedException if the format cannot be
267     * written to a stream
268     * @throws IllegalArgumentException if the archiver name or stream is null
269     */
270    public ArchiveOutputStream createArchiveOutputStream(
271            final String archiverName, final OutputStream out)
272            throws ArchiveException {
273        if (archiverName == null) {
274            throw new IllegalArgumentException("Archivername must not be null.");
275        }
276        if (out == null) {
277            throw new IllegalArgumentException("OutputStream must not be null.");
278        }
279
280        if (AR.equalsIgnoreCase(archiverName)) {
281            return new ArArchiveOutputStream(out);
282        }
283        if (ZIP.equalsIgnoreCase(archiverName)) {
284            ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
285            if (entryEncoding != null) {
286                zip.setEncoding(entryEncoding);
287            }
288            return zip;
289        }
290        if (TAR.equalsIgnoreCase(archiverName)) {
291            if (entryEncoding != null) {
292                return new TarArchiveOutputStream(out, entryEncoding);
293            } else {
294                return new TarArchiveOutputStream(out);
295            }
296        }
297        if (JAR.equalsIgnoreCase(archiverName)) {
298            if (entryEncoding != null) {
299                return new JarArchiveOutputStream(out, entryEncoding);
300            } else {
301                return new JarArchiveOutputStream(out);
302            }
303        }
304        if (CPIO.equalsIgnoreCase(archiverName)) {
305            if (entryEncoding != null) {
306                return new CpioArchiveOutputStream(out, entryEncoding);
307            } else {
308                return new CpioArchiveOutputStream(out);
309            }
310        }
311        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
312            throw new StreamingNotSupportedException(SEVEN_Z);
313        }
314        throw new ArchiveException("Archiver: " + archiverName + " not found.");
315    }
316
317    /**
318     * Create an archive input stream from an input stream, autodetecting
319     * the archive type from the first few bytes of the stream. The InputStream
320     * must support marks, like BufferedInputStream.
321     * 
322     * @param in the input stream
323     * @return the archive input stream
324     * @throws ArchiveException if the archiver name is not known
325     * @throws StreamingNotSupportedException if the format cannot be
326     * read from a stream
327     * @throws IllegalArgumentException if the stream is null or does not support mark
328     */
329    public ArchiveInputStream createArchiveInputStream(final InputStream in)
330            throws ArchiveException {
331        if (in == null) {
332            throw new IllegalArgumentException("Stream must not be null.");
333        }
334
335        if (!in.markSupported()) {
336            throw new IllegalArgumentException("Mark is not supported.");
337        }
338
339        final byte[] signature = new byte[12];
340        in.mark(signature.length);
341        try {
342            int signatureLength = IOUtils.readFully(in, signature);
343            in.reset();
344            if (ZipArchiveInputStream.matches(signature, signatureLength)) {
345                if (entryEncoding != null) {
346                    return new ZipArchiveInputStream(in, entryEncoding);
347                } else {
348                    return new ZipArchiveInputStream(in);
349                }
350            } else if (JarArchiveInputStream.matches(signature, signatureLength)) {
351                if (entryEncoding != null) {
352                    return new JarArchiveInputStream(in, entryEncoding);
353                } else {
354                    return new JarArchiveInputStream(in);
355                }
356            } else if (ArArchiveInputStream.matches(signature, signatureLength)) {
357                return new ArArchiveInputStream(in);
358            } else if (CpioArchiveInputStream.matches(signature, signatureLength)) {
359                if (entryEncoding != null) {
360                    return new CpioArchiveInputStream(in, entryEncoding);
361                } else {
362                    return new CpioArchiveInputStream(in);
363                }
364            } else if (ArjArchiveInputStream.matches(signature, signatureLength)) {
365                if (entryEncoding != null) {
366                    return new ArjArchiveInputStream(in, entryEncoding);
367                } else {
368                    return new ArjArchiveInputStream(in);
369                }
370            } else if (SevenZFile.matches(signature, signatureLength)) {
371                throw new StreamingNotSupportedException(SEVEN_Z);
372            }
373
374            // Dump needs a bigger buffer to check the signature;
375            final byte[] dumpsig = new byte[32];
376            in.mark(dumpsig.length);
377            signatureLength = IOUtils.readFully(in, dumpsig);
378            in.reset();
379            if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
380                return new DumpArchiveInputStream(in, entryEncoding);
381            }
382
383            // Tar needs an even bigger buffer to check the signature; read the first block
384            final byte[] tarheader = new byte[512];
385            in.mark(tarheader.length);
386            signatureLength = IOUtils.readFully(in, tarheader);
387            in.reset();
388            if (TarArchiveInputStream.matches(tarheader, signatureLength)) {
389                return new TarArchiveInputStream(in, entryEncoding);
390            }
391            // COMPRESS-117 - improve auto-recognition
392            if (signatureLength >= 512) {
393                TarArchiveInputStream tais = null;
394                try {
395                    tais = new TarArchiveInputStream(new ByteArrayInputStream(tarheader));
396                    // COMPRESS-191 - verify the header checksum
397                    if (tais.getNextTarEntry().isCheckSumOK()) {
398                        return new TarArchiveInputStream(in, encoding);
399                    }
400                } catch (Exception e) { // NOPMD
401                    // can generate IllegalArgumentException as well
402                    // as IOException
403                    // autodetection, simply not a TAR
404                    // ignored
405                } finally {
406                    IOUtils.closeQuietly(tais);
407                }
408            }
409        } catch (IOException e) {
410            throw new ArchiveException("Could not use reset and mark operations.", e);
411        }
412
413        throw new ArchiveException("No Archiver found for the stream signature");
414    }
415
416}