001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload2.core;
018
019import java.io.IOException;
020import java.io.InputStream;
021import java.io.InputStreamReader;
022import java.io.OutputStream;
023import java.io.Reader;
024import java.io.UncheckedIOException;
025import java.io.UnsupportedEncodingException;
026import java.nio.charset.Charset;
027import java.nio.charset.StandardCharsets;
028import java.nio.file.CopyOption;
029import java.nio.file.Files;
030import java.nio.file.InvalidPathException;
031import java.nio.file.Path;
032import java.nio.file.Paths;
033import java.nio.file.StandardCopyOption;
034import java.util.UUID;
035import java.util.concurrent.atomic.AtomicInteger;
036import java.util.function.Supplier;
037
038import org.apache.commons.fileupload2.core.DeferrableOutputStream.Listener;
039import org.apache.commons.fileupload2.core.DeferrableOutputStream.State;
040import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder;
041import org.apache.commons.io.Charsets;
042import org.apache.commons.io.FileCleaningTracker;
043import org.apache.commons.io.build.AbstractOrigin;
044import org.apache.commons.io.file.PathUtils;
045
046
047/**
048 * The default implementation of the {@link FileItem FileItem} interface.
049 *
050 * <p>After retrieving an instance of this class from a {@link DiskFileItemFactory} instance (see
051 * {@code org.apache.commons.fileupload2.core.servlet.ServletFileUpload
052 * #parseRequest(javax.servlet.http.HttpServletRequest)}), you may either request all contents of file at once using {@link #get()} or request an
053 * {@link java.io.InputStream InputStream} with {@link #getInputStream()} and process the file without attempting to load it into memory, which may come handy
054 * with large files.</p>
055 *
056 * <p><em>State model</em>: Instances of {@link DiskFileItem} are subject to a carefully designed state model.
057 * Depending on the so-called {@link #getThreshold() threshold}, either of the three models are possible:</p>
058 * <ol>
059 *   <li><em>threshold = -1</em>
060 *     Uploaded data is never kept in memory. Instead, a temporary file is being created immediately.
061 *
062 *     {@link #isInMemory()} will always return false, {@link #getPath()} will always return the path
063 *     of an existing file. The temporary file may be empty.</li>
064 *   <li><em>threshold = 0</em>
065 *     Uploaded data is never kept in memory. (Same as threshold=-1.) However, the temporary file is
066 *     only created, if data was uploaded. Or, in other words: The uploaded file will never be
067 *     empty.
068 *
069 *     {@link #isInMemory()} will return true, if no data was uploaded, otherwise it will be false.
070 *     In the former case {@link #getPath()} will return null, but in the latter case it returns
071 *     the path of an existing, non-empty file.</li>
072 *   <li><em>threshold &gt; 0</em>
073 *     Uploaded data will be kept in memory, if the size is below the threshold. If the size
074 *     is equal to, or above the threshold, then a temporary file has been created, and all
075 *     uploaded data has been transferred to that file.
076 *
077 *     {@link #isInMemory()} returns true, if the size of the uploaded data is below the threshold.
078 *     If so, {@link #getPath()} returns null. Otherwise, {@link #isInMemory()} returns false,
079 *     and {@link #getPath()} returns the path of an existing, temporary file. The size
080 *     of the temporary file is equal to, or above the threshold.</li>
081 * </ol>
082 *
083 * <p>Temporary files, which are created for file items, should be deleted later on. The best way to do this is using a
084 * {@link org.apache.commons.io.FileCleaningTracker}, which you can set on the {@link DiskFileItemFactory}. However, if you do use such a tracker, then you must
085 * consider the following: Temporary files are automatically deleted as soon as they are no longer needed. (More precisely, when the corresponding instance of
086 * {@link java.io.File} is garbage collected.) This is done by the so-called reaper thread, which is started and stopped automatically by the
087 * {@link org.apache.commons.io.FileCleaningTracker} when there are files to be tracked. It might make sense to terminate that thread, for example, if your web
088 * application ends. See the section on "Resource cleanup" in the users guide of Commons FileUpload.</p>
089 */
090public final class DiskFileItem implements FileItem<DiskFileItem> {
091
092    /**
093     * Builds a new {@link DiskFileItem} instance.
094     * <p>
095     * For example:
096     * </p>
097     *
098     * <pre>{@code
099     * final FileItem fileItem = fileItemFactory.fileItemBuilder()
100     *   .setFieldName("FieldName")
101     *   .setContentType("ContentType")
102     *   .setFormField(true)
103     *   .setFileName("FileName")
104     *   .setFileItemHeaders(...)
105     *   .get();
106     * }
107     * </pre>
108     */
109    public static class Builder extends AbstractFileItemBuilder<DiskFileItem, Builder> {
110
111        /**
112         * The threshold. We do maintain this separate from the {@link #getBufferSize()},
113         * because the parent class might change the value in {@link #setBufferSize(int)}.
114         */
115        private int threshold;
116
117        /**
118         * Constructs a new instance.
119         */
120        public Builder() {
121            setBufferSize(DiskFileItemFactory.DEFAULT_THRESHOLD);
122            setPath(PathUtils.getTempDirectory());
123            setCharset(DEFAULT_CHARSET);
124            setCharsetDefault(DEFAULT_CHARSET);
125        }
126
127        /**
128         * Constructs a new instance.
129         * <p>
130         * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an
131         * {@link UnsupportedOperationException}.
132         * </p>
133         *
134         * @return a new instance.
135         * @throws UnsupportedOperationException if the origin cannot provide a Path.
136         * @see AbstractOrigin#getReader(Charset)
137         */
138        @Override
139        public DiskFileItem get() {
140            final var diskFileItem = new DiskFileItem(this);
141            final var tracker = getFileCleaningTracker();
142            if (tracker != null) {
143                diskFileItem.setFileCleaningTracker(tracker);
144            }
145            return diskFileItem;
146        }
147
148        /**
149         * Equivalent to {@link #getThreshold()}.
150         * @return The threshold, which is being used.
151         * @see #getThreshold()
152         * @deprecated Since 2.0.0, use {@link #getThreshold()} instead.
153         */
154        public int getBufferSize() {
155            return getThreshold();
156        }
157
158        /**
159         * Returns the threshold.
160         * @return The threshold.
161         */
162        public int getThreshold() {
163            return threshold;
164        }
165
166        /**
167         * Equivalent to {@link #setThreshold(int)}.
168         * @param bufferSize The threshold, which is being used.
169         * @see #setThreshold(int)
170         * @return This builder.
171         * @deprecated Since 2.0.0, use {@link #setThreshold(int)} instead.
172         */
173        @Override
174        public Builder setBufferSize(final int bufferSize) {
175            return setThreshold(bufferSize);
176        }
177
178        /**
179         * Sets the threshold. The uploaded data is typically kept in memory, until
180         * a certain number of bytes (the threshold) is reached. At this point, the
181         * incoming data is transferred to a temporary file, and the in-memory data
182         * is removed.
183         * @param threshold The threshold, which is being used.
184         * @return This builder.
185         */
186        public Builder setThreshold(final int threshold) {
187            this.threshold = threshold;
188            return this;
189        }
190    }
191
192    /**
193     * Default content charset to be used when no explicit charset parameter is provided by the sender. Media subtypes of the "text" type are defined to have a
194     * default charset value of "ISO-8859-1" when received via HTTP.
195     */
196    public static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
197
198    /**
199     * UID used in unique file name generation.
200     */
201    private static final String UID = UUID.randomUUID().toString().replace('-', '_');
202
203    /**
204     * Counter used in unique identifier generation.
205     */
206    private static final AtomicInteger COUNTER = new AtomicInteger();
207
208    /**
209     * Constructs a new {@link Builder}.
210     *
211     * @return a new {@link Builder}.
212     */
213    public static Builder builder() {
214        return new Builder();
215    }
216
217    /**
218     * Tests if the file name is valid. For example, if it contains a NUL characters, it's invalid. If the file name is valid, it will be returned without any
219     * modifications. Otherwise, throw an {@link InvalidPathException}.
220     *
221     * @param fileName The file name to check
222     * @return Unmodified file name, if valid.
223     * @throws InvalidPathException The file name is invalid.
224     */
225    public static String checkFileName(final String fileName) {
226        if (fileName != null) {
227            // Specific NUL check to build a better exception message.
228            final var indexOf0 = fileName.indexOf(0);
229            if (indexOf0 != -1) {
230                final var sb = new StringBuilder();
231                for (var i = 0; i < fileName.length(); i++) {
232                    final var c = fileName.charAt(i);
233                    if (c == 0) {
234                        sb.append("\\0");
235                    } else {
236                        sb.append(c);
237                    }
238                }
239                throw new InvalidPathException(fileName, sb.toString(), indexOf0);
240            }
241            // Throws InvalidPathException on invalid file names
242            Paths.get(fileName);
243        }
244        return fileName;
245    }
246
247    /**
248     * Gets an identifier that is unique within the class loader used to load this class, but does not have random-like appearance.
249     *
250     * @return A String with the non-random looking instance identifier.
251     */
252    private static String getUniqueId() {
253        final var limit = 100_000_000;
254        final var current = COUNTER.getAndIncrement();
255        var id = Integer.toString(current);
256
257        // If you manage to get more than 100 million of ids, you'll
258        // start getting ids longer than 8 characters.
259        if (current < limit) {
260            id = ("00000000" + id).substring(id.length());
261        }
262        return id;
263    }
264
265    /**
266     * The name of the form field as provided by the browser.
267     */
268    private String fieldName;
269
270    /**
271     * The content type passed by the browser, or {@code null} if not defined.
272     */
273    private final String contentType;
274
275    /**
276     * Whether or not this item is a simple form field.
277     */
278    private volatile boolean isFormField;
279
280    /**
281     * The original file name in the user's file system.
282     */
283    private final String fileName;
284
285    /**
286     * The threshold above which uploads will be stored on disk.
287     */
288    private final int threshold;
289
290    /**
291     * The directory in which uploaded files will be stored, if stored on disk, never null.
292     */
293    private final Path repository;
294
295    /**
296     * Output stream for this item.
297     */
298    private DeferrableOutputStream dos;
299
300    /**
301     * The file items headers.
302     */
303    private FileItemHeaders fileItemHeaders;
304
305    /**
306     * Default content Charset to be used when no explicit Charset parameter is provided by the sender.
307     */
308    private Charset charsetDefault = DEFAULT_CHARSET;
309
310    /**
311     * The {@link FileCleaningTracker}, which is being used to remove
312     * temporary files.
313     */
314    private FileCleaningTracker fileCleaningTracker;
315
316    /**
317     * Constructs a new {@code DiskFileItem} instance.
318     *
319     * @param builder The DiskFileItem builder.
320     */
321    private DiskFileItem(final Builder builder) {
322        this.fieldName = builder.getFieldName();
323        this.contentType = builder.getContentType();
324        this.charsetDefault = builder.getCharset();
325        this.isFormField = builder.isFormField();
326        this.fileName = builder.getFileName();
327        this.fileItemHeaders = builder.getFileItemHeaders();
328        this.threshold = builder.getThreshold();
329        this.repository = builder.getPath() != null ? builder.getPath() : PathUtils.getTempDirectory();
330    }
331
332    /**
333     * Deletes the underlying storage for a file item, including deleting any associated temporary disk file. This method can be used to ensure that this is
334     * done at an earlier time, thus preserving system resources.
335     *
336     * @throws IOException if an error occurs.
337     */
338    @Override
339    public DiskFileItem delete() throws IOException {
340        if (dos != null) {
341            final Path path = dos.getPath();
342            if (path != null) {
343                Files.deleteIfExists(path);
344            }
345        }
346        return this;
347    }
348
349    /**
350     * Gets the contents of the file as an array of bytes. If the contents of the file were not yet cached in memory, they will be loaded from the disk storage
351     * and cached.
352     *
353     * @return The contents of the file as an array of bytes or {@code null} if the data cannot be read.
354     * @throws IOException if an I/O error occurs.
355     * @throws OutOfMemoryError     See {@link Files#readAllBytes(Path)}: If an array of the required size cannot be allocated, for example the file is larger
356     *                              than {@code 2GB}. If so, you should use {@link #getInputStream()}.
357     * @see #getInputStream()
358     * @deprecated Since 2.0.0, use {@link #getInputStream()}, or {@link #getReader()}, instead.
359     */
360    @Override
361    public byte[] get() throws IOException {
362        if (dos != null) {
363            final byte[] bytes = dos.getBytes();
364            if (bytes != null) {
365                return bytes;
366            }
367            final Path path = dos.getPath();
368            if (path != null  &&  dos.getState() == State.closed) {
369                return Files.readAllBytes(path);
370            }
371        }
372        return null;
373    }
374
375    /**
376     * Gets the content charset passed by the agent or {@code null} if not defined.
377     *
378     * @return The content charset passed by the agent or {@code null} if not defined.
379     */
380    public Charset getCharset() {
381        final var parser = new ParameterParser();
382        parser.setLowerCaseNames(true);
383        // Parameter parser can handle null input
384        final var params = parser.parse(getContentType(), ';');
385        return Charsets.toCharset(params.get("charset"), charsetDefault);
386    }
387
388    /**
389     * Gets the default charset for use when no explicit charset parameter is provided by the sender.
390     *
391     * @return the default charset
392     */
393    public Charset getCharsetDefault() {
394        return charsetDefault;
395    }
396
397    /**
398     * Gets the content type passed by the agent or {@code null} if not defined.
399     *
400     * @return The content type passed by the agent or {@code null} if not defined.
401     */
402    @Override
403    public String getContentType() {
404        return contentType;
405    }
406
407    /**
408     * Gets the name of the field in the multipart form corresponding to this file item.
409     *
410     * @return The name of the form field.
411     * @see #setFieldName(String)
412     */
413    @Override
414    public String getFieldName() {
415        return fieldName;
416    }
417
418    /**
419     * Returns the {@link FileCleaningTracker}, which is being used to remove
420     * temporary files.
421     * @return The {@link FileCleaningTracker}, which is being used to remove
422     * temporary files.
423     */
424    public FileCleaningTracker getFileCleaningTracker() {
425        return fileCleaningTracker;
426    }
427
428    /**
429     * Gets the file item headers.
430     *
431     * @return The file items headers.
432     */
433    @Override
434    public FileItemHeaders getHeaders() {
435        return fileItemHeaders;
436    }
437
438    /**
439     * Gets an {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file.
440     *
441     * @return An {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file.
442     * @throws IOException if an error occurs.
443     */
444    @Override
445    public InputStream getInputStream() throws IOException {
446        if (dos != null  &&  dos.getState() == State.closed) {
447            return dos.getInputStream();
448        }
449        throw new IllegalStateException("The file item has not been fully read.");
450    }
451
452    /**
453     * Gets the original file name in the client's file system.
454     *
455     * @return The original file name in the client's file system.
456     * @throws InvalidPathException The file name contains a NUL character, which might be an indicator of a security attack. If you intend to use the file name
457     *                              anyways, catch the exception and use {@link InvalidPathException#getInput()}.
458     */
459    @Override
460    public String getName() {
461        return checkFileName(fileName);
462    }
463
464    /**
465     * Gets an {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file.
466     *
467     * @return An {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file.
468     */
469    @Override
470    public OutputStream getOutputStream() {
471        if (dos == null) {
472            final Supplier<Path> pathSupplier =
473                    () -> this.repository.resolve(String.format("upload_%s_%s.tmp", UID, getUniqueId()));
474            try {
475                final Listener persistenceListener = new Listener() {
476                    @Override
477                    public void persisted(final Path pPath) {
478                        Listener.super.persisted(pPath);
479                        final FileCleaningTracker fct = getFileCleaningTracker();
480                        if (fct != null) {
481                            fct.track(getPath(), this);
482                        }
483                    }
484                };
485                dos = new DeferrableOutputStream(threshold, pathSupplier, persistenceListener);
486            } catch (final IOException ioe) {
487                throw new UncheckedIOException(ioe);
488            }
489        }
490        return dos;
491    }
492
493    /**
494     * Gets the {@link Path} for the {@code FileItem}'s data's temporary location on the disk. Note that for {@code FileItem}s that have their data stored in
495     * memory, this method will return {@code null}. When handling large files, you can use {@link Files#move(Path,Path,CopyOption...)} to move the file to a
496     * new location without copying the data, if the source and destination locations reside within the same logical volume.
497     *
498     * @return The data file, or {@code null} if the data is stored in memory.
499     */
500    public Path getPath() {
501        return dos == null ? null : dos.getPath();
502    }
503
504    /**
505     * Returns the contents of the file as a {@link Reader}, using the specified
506     * {@link #getCharset()}. If the contents are not yet available, returns null.
507     * This is the case, for example, if the underlying output stream has not yet
508     * been closed.
509     * @return The contents of the file as a {@link Reader}
510     * @throws UnsupportedEncodingException The character set, which is
511     *   specified in the files "content-type" header, is invalid.
512     * @throws IOException An I/O error occurred, while the
513     *   underlying {@link #getInputStream() input stream} was created.
514     */
515    public Reader getReader() throws IOException, UnsupportedEncodingException {
516        final InputStream is = getInputStream();
517        final var parser = new ParameterParser();
518        parser.setLowerCaseNames(true);
519        // Parameter parser can handle null input
520        final var params = parser.parse(getContentType(), ';');
521        final Charset cs = Charsets.toCharset(params.get("charset"), charsetDefault);
522        return new InputStreamReader(is, cs);
523    }
524
525    /**
526     * Gets the size of the file.
527     *
528     * @return The size of the file, in bytes.
529     */
530    @Override
531    public long getSize() {
532        return dos == null ? 0L : dos.getSize();
533    }
534
535    /**
536     * Gets the contents of the file as a String, using the default character encoding. This method uses {@link #get()} to retrieve the contents of the file.
537     *
538     * @return The contents of the file, as a string, if available, or null.
539     * @throws IOException if an I/O error occurs
540     * @throws OutOfMemoryError See {@link Files#readAllBytes(Path)}: If a string of the required size cannot be allocated,
541     *   for example the file is larger than {@code 2GB}. If so, you should use {@link #getReader()}.
542     * @throws UnsupportedEncodingException The character set, which is
543     *   specified in the files "content-type" header, is invalid.
544     * @deprecated Since 2.0.0, use {@link #getReader()} instead.
545     */
546    @Override
547    public String getString() throws IOException, UnsupportedEncodingException, OutOfMemoryError {
548        final byte[] bytes = get();
549        return bytes == null ? null : new String(bytes, getCharset());
550    }
551
552    /**
553     * Gets the contents of the file as a String, using the specified encoding. This method uses {@link #get()} to retrieve the contents of the file.
554     *
555     * @param charset The charset to use.
556     * @return The contents of the file, as a string.
557     * @throws IOException if an I/O error occurs
558     */
559    @Override
560    public String getString(final Charset charset) throws IOException {
561        return new String(get(), Charsets.toCharset(charset, charsetDefault));
562    }
563
564    /**
565     * Returns the file items threshold.
566     * @return The threshold.
567     */
568    public int getThreshold() {
569        return threshold;
570    }
571
572    /**
573     * Tests whether or not a {@code FileItem} instance represents a simple form field.
574     *
575     * @return {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file.
576     * @see #setFormField(boolean)
577     */
578    @Override
579    public boolean isFormField() {
580        return isFormField;
581    }
582
583    /**
584     * Provides a hint as to whether or not the file contents will be read from memory.
585     *
586     * @return {@code true} if the file contents will be read from memory; {@code false} otherwise.
587     */
588    @Override
589    public boolean isInMemory() {
590        return dos == null || dos.isInMemory();
591    }
592
593    /**
594     * Sets the default charset for use when no explicit charset parameter is provided by the sender.
595     *
596     * @param charset the default charset
597     * @return {@code this} instance.
598     */
599    public DiskFileItem setCharsetDefault(final Charset charset) {
600        charsetDefault = charset;
601        return this;
602    }
603
604    /**
605     * Sets the field name used to reference this file item.
606     *
607     * @param fieldName The name of the form field.
608     * @see #getFieldName()
609     */
610    @Override
611    public DiskFileItem setFieldName(final String fieldName) {
612        this.fieldName = fieldName;
613        return this;
614    }
615
616    /**
617     * Sets the {@link FileCleaningTracker}, which is being used to remove
618     * temporary files.
619     * @param fileCleaningTracker The {@link FileCleaningTracker}, which is being used to
620     * remove temporary files.
621     */
622    public void setFileCleaningTracker(final FileCleaningTracker fileCleaningTracker) {
623        this.fileCleaningTracker = fileCleaningTracker;
624    }
625
626    /**
627     * Specifies whether or not a {@code FileItem} instance represents a simple form field.
628     *
629     * @param state {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file.
630     * @see #isFormField()
631     */
632    @Override
633    public DiskFileItem setFormField(final boolean state) {
634        isFormField = state;
635        return this;
636    }
637
638    /**
639     * Sets the file item headers.
640     *
641     * @param headers The file items headers.
642     */
643    @Override
644    public DiskFileItem setHeaders(final FileItemHeaders headers) {
645        this.fileItemHeaders = headers;
646        return this;
647    }
648
649    /**
650     * Returns a string representation of this object.
651     *
652     * @return a string representation of this object.
653     */
654    @Override
655    public String toString() {
656        return String.format("name=%s, StoreLocation=%s, size=%s bytes, isFormField=%s, FieldName=%s", getName(), getPath(), getSize(), isFormField(),
657                getFieldName());
658    }
659
660    /**
661     * Writes an uploaded item to disk.
662     * <p>
663     * The client code is not concerned with whether or not the item is stored in memory, or on disk in a temporary location. They just want to write the
664     * uploaded item to a file.
665     * </p>
666     * <p>
667     * This implementation first attempts to rename the uploaded item to the specified destination file, if the item was originally written to disk. Otherwise,
668     * the data will be copied to the specified file.
669     * </p>
670     * <p>
671     * This method is only guaranteed to work <em>once</em>, the first time it is invoked for a particular item. This is because, in the event that the method
672     * renames a temporary file, that file will no longer be available to copy or rename again at a later time.
673     * </p>
674     *
675     * @param file The {@code File} into which the uploaded item should be stored.
676     * @throws IOException if an error occurs.
677     */
678    @Override
679    public DiskFileItem write(final Path file) throws IOException {
680        if (isInMemory()) {
681            try (var fout = Files.newOutputStream(file)) {
682                fout.write(get());
683            } catch (final IOException e) {
684                throw new IOException("Unexpected output data", e);
685            }
686        } else {
687            final var outputFile = getPath();
688            if (outputFile == null) {
689                /*
690                 * For whatever reason we cannot write the file to disk.
691                 */
692                throw new FileUploadException("Cannot write uploaded file to disk.");
693            }
694            //
695            // The uploaded file is being stored on disk in a temporary location so move it to the desired file.
696            //
697            Files.move(outputFile, file, StandardCopyOption.REPLACE_EXISTING);
698        }
699        return this;
700    }
701}