001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.filefilter;
018
019import java.io.File;
020import java.io.IOException;
021import java.io.Serializable;
022import java.nio.ByteBuffer;
023import java.nio.channels.FileChannel;
024import java.nio.charset.Charset;
025import java.nio.file.FileVisitResult;
026import java.nio.file.Files;
027import java.nio.file.Path;
028import java.nio.file.attribute.BasicFileAttributes;
029import java.util.Arrays;
030import java.util.Objects;
031
032import org.apache.commons.io.RandomAccessFileMode;
033import org.apache.commons.io.RandomAccessFiles;
034
035/**
036 * <p>
037 * File filter for matching files containing a "magic number". A magic number
038 * is a unique series of bytes common to all files of a specific file format.
039 * For instance, all Java class files begin with the bytes
040 * {@code 0xCAFEBABE}.
041 * </p>
042 * <h2>Using Classic IO</h2>
043 * <pre>
044 * File dir = FileUtils.current();
045 * MagicNumberFileFilter javaClassFileFilter =
046 *     MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
047 *       (byte) 0xBA, (byte) 0xBE});
048 * String[] javaClassFiles = dir.list(javaClassFileFilter);
049 * for (String javaClassFile : javaClassFiles) {
050 *     System.out.println(javaClassFile);
051 * }
052 * </pre>
053 *
054 * <p>
055 * Sometimes, such as in the case of TAR files, the
056 * magic number will be offset by a certain number of bytes in the file. In the
057 * case of TAR archive files, this offset is 257 bytes.
058 * </p>
059 *
060 * <pre>
061 * File dir = FileUtils.current();
062 * MagicNumberFileFilter tarFileFilter =
063 *     MagicNumberFileFilter("ustar", 257);
064 * String[] tarFiles = dir.list(tarFileFilter);
065 * for (String tarFile : tarFiles) {
066 *     System.out.println(tarFile);
067 * }
068 * </pre>
069 * <h2>Using NIO</h2>
070 * <pre>
071 * final Path dir = PathUtils.current();
072 * final AccumulatorPathVisitor visitor = AccumulatorPathVisitor.withLongCounters(MagicNumberFileFilter("ustar", 257));
073 * //
074 * // Walk one directory
075 * Files.<strong>walkFileTree</strong>(dir, Collections.emptySet(), 1, visitor);
076 * System.out.println(visitor.getPathCounters());
077 * System.out.println(visitor.getFileList());
078 * //
079 * visitor.getPathCounters().reset();
080 * //
081 * // Walk directory tree
082 * Files.<strong>walkFileTree</strong>(dir, visitor);
083 * System.out.println(visitor.getPathCounters());
084 * System.out.println(visitor.getDirList());
085 * System.out.println(visitor.getFileList());
086 * </pre>
087 * <h2>Deprecating Serialization</h2>
088 * <p>
089 * <em>Serialization is deprecated and will be removed in 3.0.</em>
090 * </p>
091 *
092 * <h2>Deprecating Serialization</h2>
093 * <p>
094 * <em>Serialization is deprecated and will be removed in 3.0.</em>
095 * </p>
096 *
097 * @since 2.0
098 * @see FileFilterUtils#magicNumberFileFilter(byte[])
099 * @see FileFilterUtils#magicNumberFileFilter(String)
100 * @see FileFilterUtils#magicNumberFileFilter(byte[], long)
101 * @see FileFilterUtils#magicNumberFileFilter(String, long)
102 */
103public class MagicNumberFileFilter extends AbstractFileFilter implements Serializable {
104
105    /**
106     * The serialization version unique identifier.
107     */
108    private static final long serialVersionUID = -547733176983104172L;
109
110    /**
111     * The magic number to compare against the file's bytes at the provided
112     * offset.
113     */
114    private final byte[] magicNumbers;
115
116    /**
117     * The offset (in bytes) within the files that the magic number's bytes
118     * should appear.
119     */
120    private final long byteOffset;
121
122    /**
123     * <p>
124     * Constructs a new MagicNumberFileFilter and associates it with the magic
125     * number to test for in files. This constructor assumes a starting offset
126     * of {@code 0}.
127     * </p>
128     *
129     * <p>
130     * It is important to note that <em>the array is not cloned</em> and that
131     * any changes to the magic number array after construction will affect the
132     * behavior of this file filter.
133     * </p>
134     *
135     * <pre>
136     * MagicNumberFileFilter javaClassFileFilter =
137     *     MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
138     *       (byte) 0xBA, (byte) 0xBE});
139     * </pre>
140     *
141     * @param magicNumber the magic number to look for in the file.
142     * @throws IllegalArgumentException if {@code magicNumber} is
143     *         {@code null}, or contains no bytes.
144     */
145    public MagicNumberFileFilter(final byte[] magicNumber) {
146        this(magicNumber, 0);
147    }
148
149    /**
150     * <p>
151     * Constructs a new MagicNumberFileFilter and associates it with the magic
152     * number to test for in files and the byte offset location in the file to
153     * to look for that magic number.
154     * </p>
155     *
156     * <pre>
157     * MagicNumberFileFilter tarFileFilter =
158     *     MagicNumberFileFilter(new byte[] {0x75, 0x73, 0x74, 0x61, 0x72}, 257);
159     * </pre>
160     *
161     * <pre>
162     * MagicNumberFileFilter javaClassFileFilter =
163     *     MagicNumberFileFilter(new byte[] {0xCA, 0xFE, 0xBA, 0xBE}, 0);
164     * </pre>
165     *
166     * @param magicNumbers the magic number to look for in the file.
167     * @param offset the byte offset in the file to start comparing bytes.
168     * @throws IllegalArgumentException if {@code magicNumber}
169     *         contains no bytes, or {@code offset}
170     *         is a negative number.
171     */
172    public MagicNumberFileFilter(final byte[] magicNumbers, final long offset) {
173        Objects.requireNonNull(magicNumbers, "magicNumbers");
174        if (magicNumbers.length == 0) {
175            throw new IllegalArgumentException("The magic number must contain at least one byte");
176        }
177        if (offset < 0) {
178            throw new IllegalArgumentException("The offset cannot be negative");
179        }
180
181        this.magicNumbers = magicNumbers.clone();
182        this.byteOffset = offset;
183    }
184
185    /**
186     * <p>
187     * Constructs a new MagicNumberFileFilter and associates it with the magic
188     * number to test for in files. This constructor assumes a starting offset
189     * of {@code 0}.
190     * </p>
191     *
192     * Example usage:
193     * <pre>
194     * {@code
195     * MagicNumberFileFilter xmlFileFilter =
196     *     MagicNumberFileFilter("<?xml");
197     * }
198     * </pre>
199     *
200     * @param magicNumber the magic number to look for in the file.
201     *        The string is converted to bytes using the platform default charset.
202     *
203     * @throws IllegalArgumentException if {@code magicNumber} is
204     *         {@code null} or the empty String.
205     */
206    public MagicNumberFileFilter(final String magicNumber) {
207        this(magicNumber, 0);
208    }
209
210    /**
211     * <p>
212     * Constructs a new MagicNumberFileFilter and associates it with the magic
213     * number to test for in files and the byte offset location in the file to
214     * to look for that magic number.
215     * </p>
216     *
217     * <pre>
218     * MagicNumberFileFilter tarFileFilter = MagicNumberFileFilter("ustar", 257);
219     * </pre>
220     * <p>
221     * This method uses the virtual machine's {@link Charset#defaultCharset() default charset}.
222     * </p>
223     *
224     * @param magicNumber the magic number to look for in the file.
225     *        The string is converted to bytes using the platform default charset.
226     * @param offset the byte offset in the file to start comparing bytes.
227     * @throws IllegalArgumentException if {@code magicNumber} is
228     *         the empty String, or {@code offset} is
229     *         a negative number.
230     */
231    public MagicNumberFileFilter(final String magicNumber, final long offset) {
232        this(magicNumber.getBytes(Charset.defaultCharset()), offset);
233    }
234
235    /**
236     * <p>
237     * Accepts the provided file if the file contains the file filter's magic
238     * number at the specified offset.
239     * </p>
240     *
241     * <p>
242     * If any {@link IOException}s occur while reading the file, the file will
243     * be rejected.
244     * </p>
245     *
246     * @param file the file to accept or reject.
247     * @return {@code true} if the file contains the filter's magic number
248     *         at the specified offset, {@code false} otherwise.
249     */
250    @Override
251    public boolean accept(final File file) {
252        if (isFile(file) && file.canRead()) {
253            try {
254                return RandomAccessFileMode.READ_ONLY.apply(file.toPath(),
255                        raf -> Arrays.equals(magicNumbers, RandomAccessFiles.read(raf, byteOffset, magicNumbers.length)));
256            } catch (final IOException ignored) {
257                // Do nothing, fall through and do not accept file
258            }
259        }
260        return false;
261    }
262
263    /**
264     * <p>
265     * Accepts the provided file if the file contains the file filter's magic
266     * number at the specified offset.
267     * </p>
268     * <p>
269     * If any {@link IOException}s occur while reading the file, the file will
270     * be rejected.
271     *
272     * </p>
273     * @param file the file to accept or reject.
274     * @param attributes the path's basic attributes (may be null).
275     * @return {@code true} if the file contains the filter's magic number
276     *         at the specified offset, {@code false} otherwise.
277     * @since 2.9.0
278     */
279    @Override
280    public FileVisitResult accept(final Path file, final BasicFileAttributes attributes) {
281        if (file != null && Files.isRegularFile(file) && Files.isReadable(file)) {
282            try {
283                try (FileChannel fileChannel = FileChannel.open(file)) {
284                    final ByteBuffer byteBuffer = ByteBuffer.allocate(this.magicNumbers.length);
285                    fileChannel.position(byteOffset);
286                    final int read = fileChannel.read(byteBuffer);
287                    if (read != magicNumbers.length) {
288                        return FileVisitResult.TERMINATE;
289                    }
290                    return toFileVisitResult(Arrays.equals(this.magicNumbers, byteBuffer.array()));
291                }
292            } catch (final IOException ignored) {
293                // Do nothing, fall through and do not accept file
294            }
295        }
296        return FileVisitResult.TERMINATE;
297    }
298
299    /**
300     * Returns a String representation of the file filter, which includes the
301     * magic number bytes and byte offset.
302     *
303     * @return a String representation of the file filter.
304     */
305    @Override
306    public String toString() {
307        final StringBuilder builder = new StringBuilder(super.toString());
308        builder.append("(");
309        // TODO perhaps use hex if value is not printable
310        builder.append(new String(magicNumbers, Charset.defaultCharset()));
311        builder.append(",");
312        builder.append(this.byteOffset);
313        builder.append(")");
314        return builder.toString();
315    }
316}