001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lzw;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.compressors.CompressorInputStream;
026import org.apache.commons.compress.utils.BitInputStream;
027
028/**
029 * <p>Generic LZW implementation. It is used internally for
030 * the Z decompressor and the Unshrinking Zip file compression method,
031 * but may be useful for third-party projects in implementing their own LZW variations.</p>
032 *
033 * @NotThreadSafe
034 * @since 1.10
035 */
036public abstract class LZWInputStream extends CompressorInputStream {
037    private final byte[] oneByte = new byte[1];
038
039    protected final BitInputStream in;
040    protected int clearCode = -1;
041    protected int codeSize = 9;
042    protected byte previousCodeFirstChar;
043    protected int previousCode = -1;
044    protected int tableSize = 0;
045    protected int[] prefixes;
046    protected byte[] characters;
047    private byte[] outputStack;
048    private int outputStackLocation;
049
050    protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
051        this.in = new BitInputStream(inputStream, byteOrder);
052    }
053
054    @Override
055    public void close() throws IOException {
056        in.close();
057    }
058    
059    @Override
060    public int read() throws IOException {
061        int ret = read(oneByte);
062        if (ret < 0) {
063            return ret;
064        }
065        return 0xff & oneByte[0];
066    }
067    
068    @Override
069    public int read(byte[] b, int off, int len) throws IOException {
070        int bytesRead = readFromStack(b, off, len);
071        while (len - bytesRead > 0) {
072            int result = decompressNextSymbol();
073            if (result < 0) {
074                if (bytesRead > 0) {
075                    count(bytesRead);
076                    return bytesRead;
077                }
078                return result;
079            }
080            bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
081        }
082        count(bytesRead);
083        return bytesRead;
084    }
085
086    /**
087     * Read the next code and expand it.
088     */
089    protected abstract int decompressNextSymbol() throws IOException;
090
091    /**
092     * Add a new entry to the dictionary.
093     */
094    protected abstract int addEntry(int previousCode, byte character)
095        throws IOException;
096
097    /**
098     * Sets the clear code based on the code size.
099     */
100    protected void setClearCode(int codeSize) {
101        clearCode = (1 << (codeSize - 1));
102    }
103
104    /**
105     * Initializes the arrays based on the maximum code size.
106     */
107    protected void initializeTables(int maxCodeSize) {
108        final int maxTableSize = 1 << maxCodeSize;
109        prefixes = new int[maxTableSize];
110        characters = new byte[maxTableSize];
111        outputStack = new byte[maxTableSize];
112        outputStackLocation = maxTableSize;
113        final int max = 1 << 8;
114        for (int i = 0; i < max; i++) {
115            prefixes[i] = -1;
116            characters[i] = (byte) i;
117        }
118    }
119
120    /**
121     * Reads the next code from the stream.
122     */
123    protected int readNextCode() throws IOException {
124        if (codeSize > 31) {
125            throw new IllegalArgumentException("code size must not be bigger than 31");
126        }
127        return (int) in.readBits(codeSize);
128    }
129    
130    /**
131     * Adds a new entry if the maximum table size hasn't been exceeded
132     * and returns the new index.
133     */
134    protected int addEntry(int previousCode, byte character, int maxTableSize) {
135        if (tableSize < maxTableSize) {
136            prefixes[tableSize] = previousCode;
137            characters[tableSize] = character;
138            return tableSize++;
139        }
140        return -1;
141    }
142
143    /**
144     * Add entry for repeat of previousCode we haven't added, yet.
145     */
146    protected int addRepeatOfPreviousCode() throws IOException {
147        if (previousCode == -1) {
148            // can't have a repeat for the very first code
149            throw new IOException("The first code can't be a reference to its preceding code");
150        }
151        return addEntry(previousCode, previousCodeFirstChar);
152    }
153
154    /**
155     * Expands the entry with index code to the output stack and may
156     * create a new entry
157     */
158    protected int expandCodeToOutputStack(int code, boolean addedUnfinishedEntry)
159        throws IOException {
160        for (int entry = code; entry >= 0; entry = prefixes[entry]) {
161            outputStack[--outputStackLocation] = characters[entry];
162        }
163        if (previousCode != -1 && !addedUnfinishedEntry) {
164            addEntry(previousCode, outputStack[outputStackLocation]);
165        }
166        previousCode = code;
167        previousCodeFirstChar = outputStack[outputStackLocation];
168        return outputStackLocation;
169    }
170
171    private int readFromStack(byte[] b, int off, int len) {
172        int remainingInStack = outputStack.length - outputStackLocation;
173        if (remainingInStack > 0) {
174            int maxLength = Math.min(remainingInStack, len);
175            System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
176            outputStackLocation += maxLength;
177            return maxLength;
178        }
179        return 0;
180    }
181}