001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018
019package org.apache.commons.compress.archivers.zip;
020
021import org.apache.commons.compress.archivers.ArchiveStreamFactory;
022import org.apache.commons.compress.utils.FileNameUtils;
023import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel;
024
025import java.io.File;
026import java.io.IOException;
027import java.io.Serializable;
028import java.nio.ByteBuffer;
029import java.nio.channels.SeekableByteChannel;
030import java.nio.file.Files;
031import java.nio.file.StandardOpenOption;
032import java.util.ArrayList;
033import java.util.Arrays;
034import java.util.Comparator;
035import java.util.List;
036import java.util.Objects;
037import java.util.regex.Pattern;
038
039/**
040 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like.
041 *
042 * <p>If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of
043 * the archive.</p>
044 *
045 * @since 1.20
046 */
047public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel {
048    private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4;
049    private final ByteBuffer zipSplitSignatureByteBuffer =
050        ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH);
051
052    /**
053     * Concatenates the given channels.
054     *
055     * <p>The channels should be add in ascending order, e.g. z01,
056     * z02, ... z99, zip please note that the .zip file is the last
057     * segment and should be added as the last one in the channels</p>
058     *
059     * @param channels the channels to concatenate
060     * @throws NullPointerException if channels is null
061     * @throws IOException if the first channel doesn't seem to hold
062     * the beginning of a split archive
063     */
064    public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels)
065        throws IOException {
066        super(channels);
067
068        // the first split zip segment should begin with zip split signature
069        assertSplitSignature(channels);
070    }
071
072    /**
073     * Based on the zip specification:
074     *
075     * <p>
076     * 8.5.3 Spanned/Split archives created using PKZIP for Windows
077     * (V2.50 or greater), PKZIP Command Line (V2.50 or greater),
078     * or PKZIP Explorer will include a special spanning
079     * signature as the first 4 bytes of the first segment of
080     * the archive.  This signature (0x08074b50) will be
081     * followed immediately by the local header signature for
082     * the first file in the archive.
083     *
084     * <p>
085     * the first 4 bytes of the first zip split segment should be the zip split signature(0x08074B50)
086     *
087     * @param channels channels to be validated
088     * @throws IOException
089     */
090    private void assertSplitSignature(final List<SeekableByteChannel> channels)
091        throws IOException {
092        final SeekableByteChannel channel = channels.get(0);
093        // the zip split file signature is at the beginning of the first split segment
094        channel.position(0L);
095
096        zipSplitSignatureByteBuffer.rewind();
097        channel.read(zipSplitSignatureByteBuffer);
098        final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array());
099        if (!signature.equals(ZipLong.DD_SIG)) {
100            channel.position(0L);
101            throw new IOException("The first zip split segment does not begin with split zip file signature");
102        }
103
104        channel.position(0L);
105    }
106
107    /**
108     * Concatenates the given channels.
109     *
110     * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip)
111     *                 and theses channels should be added in correct order (e.g. .z01, .z02... .z99, .zip)
112     * @return SeekableByteChannel that concatenates all provided channels
113     * @throws NullPointerException if channels is null
114     * @throws IOException if reading channels fails
115     */
116    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException {
117        if (Objects.requireNonNull(channels, "channels must not be null").length == 1) {
118            return channels[0];
119        }
120        return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels));
121    }
122
123    /**
124     * Concatenates the given channels.
125     *
126     * @param lastSegmentChannel channel of the last segment of split zip segments, its extension should be .zip
127     * @param channels           the channels to concatenate except for the last segment,
128     *                           note theses channels should be added in correct order (e.g. .z01, .z02... .z99)
129     * @return SeekableByteChannel that concatenates all provided channels
130     * @throws NullPointerException if lastSegmentChannel or channels is null
131     * @throws IOException if the first channel doesn't seem to hold
132     * the beginning of a split archive
133     */
134    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel,
135        final Iterable<SeekableByteChannel> channels) throws IOException {
136        Objects.requireNonNull(channels, "channels");
137        Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel");
138
139        final List<SeekableByteChannel> channelsList = new ArrayList<>();
140        for (final SeekableByteChannel channel : channels) {
141            channelsList.add(channel);
142        }
143        channelsList.add(lastSegmentChannel);
144
145        return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0]));
146    }
147
148    /**
149     * Concatenates zip split files from the last segment(the extension SHOULD be .zip)
150     *
151     * @param lastSegmentFile the last segment of zip split files, note that the extension SHOULD be .zip
152     * @return SeekableByteChannel that concatenates all zip split files
153     * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip
154     * @throws IOException if the first channel doesn't seem to hold
155     * the beginning of a split archive
156     */
157    public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException {
158        final String extension = FileNameUtils.getExtension(lastSegmentFile.getCanonicalPath());
159        if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) {
160            throw new IllegalArgumentException("The extension of last zip split segment should be .zip");
161        }
162
163        final File parent = lastSegmentFile.getParentFile();
164        final String fileBaseName = FileNameUtils.getBaseName(lastSegmentFile.getCanonicalPath());
165        final ArrayList<File> splitZipSegments = new ArrayList<>();
166
167        // zip split segments should be like z01,z02....z(n-1) based on the zip specification
168        final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+");
169        final File[] children = parent.listFiles();
170        if (children != null) {
171            for (final File file : children) {
172                if (!pattern.matcher(file.getName()).matches()) {
173                    continue;
174                }
175
176                splitZipSegments.add(file);
177            }
178        }
179
180        splitZipSegments.sort(new ZipSplitSegmentComparator());
181        return forFiles(lastSegmentFile, splitZipSegments);
182    }
183
184    /**
185     * Concatenates the given files.
186     *
187     * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip)
188     *              and theses files should be added in correct order (e.g. .z01, .z02... .z99, .zip)
189     * @return SeekableByteChannel that concatenates all provided files
190     * @throws NullPointerException if files is null
191     * @throws IOException          if opening a channel for one of the files fails
192     * @throws IOException if the first channel doesn't seem to hold
193     * the beginning of a split archive
194     */
195    public static SeekableByteChannel forFiles(final File... files) throws IOException {
196        final List<SeekableByteChannel> channels = new ArrayList<>();
197        for (final File f : Objects.requireNonNull(files, "files must not be null")) {
198            channels.add(Files.newByteChannel(f.toPath(), StandardOpenOption.READ));
199        }
200        if (channels.size() == 1) {
201            return channels.get(0);
202        }
203        return new ZipSplitReadOnlySeekableByteChannel(channels);
204    }
205
206    /**
207     * Concatenates the given files.
208     *
209     * @param lastSegmentFile the last segment of split zip segments, its extension should be .zip
210     * @param files           the files to concatenate except for the last segment,
211     *                        note theses files should be added in correct order (e.g. .z01, .z02... .z99)
212     * @return SeekableByteChannel that concatenates all provided files
213     * @throws IOException if the first channel doesn't seem to hold
214     * the beginning of a split archive
215     * @throws NullPointerException if files or lastSegmentFile is null
216     */
217    public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException {
218        Objects.requireNonNull(files, "files");
219        Objects.requireNonNull(lastSegmentFile, "lastSegmentFile");
220
221        final List<File> filesList = new ArrayList<>();
222        for (final File f : files) {
223            filesList.add(f);
224        }
225        filesList.add(lastSegmentFile);
226
227        return forFiles(filesList.toArray(new File[0]));
228    }
229
230    private static class ZipSplitSegmentComparator implements Comparator<File>, Serializable {
231        private static final long serialVersionUID = 20200123L;
232        @Override
233        public int compare(final File file1, final File file2) {
234            final String extension1 = FileNameUtils.getExtension(file1.getPath());
235            final String extension2 = FileNameUtils.getExtension(file2.getPath());
236
237            if (!extension1.startsWith("z")) {
238                return -1;
239            }
240
241            if (!extension2.startsWith("z")) {
242                return 1;
243            }
244
245            final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1));
246            final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1));
247
248            return splitSegmentNumber1.compareTo(splitSegmentNumber2);
249        }
250    }
251}