1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
use ref_cast::RefCast;

use crate::http::RawStr;

/// A file name in a [`TempFile`] or multipart [`DataField`].
///
/// A `Content-Disposition` header, either in a response or a multipart field,
/// can optionally specify a `filename` directive as identifying information for
/// the attached file. This type represents the value of that directive.
///
/// # Safety
///
/// There are no restrictions on the value of the directive. In particular, the
/// value can be wholly unsafe to use as a file name in common contexts. As
/// such, Rocket sanitizes the value into a version that _is_ safe to use as a
/// file name in common contexts; this sanitized version can be retrieved via
/// [`FileName::as_str()`] and is returned by [`TempFile::name()`].
///
/// You will likely want to prepend or append random or user-specific components
/// to the name to avoid collisions; UUIDs make for a good "random" data. You
/// may also prefer to avoid the value in the directive entirely by using a
/// safe, application-generated name instead.
///
/// [`TempFile::name()`]: crate::fs::TempFile::name
/// [`DataField`]: crate::form::DataField
/// [`TempFile`]: crate::fs::TempFile
#[repr(transparent)]
#[derive(RefCast, Debug)]
pub struct FileName(str);

impl FileName {
    /// Wraps a string as a `FileName`. This is cost-free.
    ///
    /// # Example
    ///
    /// ```rust
    /// use rocket::fs::FileName;
    ///
    /// let name = FileName::new("some-file.txt");
    /// assert_eq!(name.as_str(), Some("some-file"));
    ///
    /// let name = FileName::new("some-file.txt");
    /// assert_eq!(name.dangerous_unsafe_unsanitized_raw(), "some-file.txt");
    /// ```
    pub fn new<S: AsRef<str> + ?Sized>(string: &S) -> &FileName {
        FileName::ref_cast(string.as_ref())
    }

    /// The sanitized file name, stripped of any file extension and special
    /// characters, safe for use as a file name.
    ///
    /// # Sanitization
    ///
    /// A "sanitized" file name is a non-empty string, stripped of its file
    /// extension, which is not a platform-specific reserved name and does not
    /// contain any platform-specific special characters.
    ///
    /// On Unix, these are the characters `'.', '/', '\\', '<', '>', '|', ':',
    /// '(', ')', '&', ';', '#', '?', '*'`.
    ///
    /// On Windows (and non-Unix OSs), these are the characters `'.', '<', '>',
    /// ':', '"', '/', '\', '|', '?', '*', ',', ';', '=', '(', ')', '&', '#'`,
    /// and the reserved names `"CON", "PRN", "AUX", "NUL", "COM1", "COM2",
    /// "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2",
    /// "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"`.
    ///
    /// Additionally, all control characters are considered "special".
    ///
    /// An attempt is made to transform the raw file name into a sanitized
    /// version by identifying a valid substring of the raw file name that meets
    /// this criteria. If none is found, `None` is returned.
    ///
    /// # Example
    ///
    /// ```rust
    /// use rocket::fs::FileName;
    ///
    /// let name = FileName::new("some-file.txt");
    /// assert_eq!(name.as_str(), Some("some-file"));
    ///
    /// let name = FileName::new("some-file.txt.zip");
    /// assert_eq!(name.as_str(), Some("some-file"));
    ///
    /// let name = FileName::new("../../../../etc/shadow");
    /// assert_eq!(name.as_str(), Some("shadow"));
    ///
    /// let name = FileName::new("/etc/.shadow");
    /// assert_eq!(name.as_str(), Some("shadow"));
    ///
    /// let name = FileName::new("/a/b/some/file.txt.zip");
    /// assert_eq!(name.as_str(), Some("file"));
    ///
    /// let name = FileName::new("/a/b/some/.file.txt.zip");
    /// assert_eq!(name.as_str(), Some("file"));
    ///
    /// let name = FileName::new("/a/b/some/.*file.txt.zip");
    /// assert_eq!(name.as_str(), Some("file"));
    ///
    /// let name = FileName::new("a/\\b/some/.*file<.txt.zip");
    /// assert_eq!(name.as_str(), Some("file"));
    ///
    /// let name = FileName::new(">>>.foo.txt");
    /// assert_eq!(name.as_str(), Some("foo"));
    ///
    /// let name = FileName::new("b:c");
    /// #[cfg(unix)] assert_eq!(name.as_str(), Some("b"));
    /// #[cfg(not(unix))] assert_eq!(name.as_str(), Some("c"));
    ///
    /// let name = FileName::new("//./.<>");
    /// assert_eq!(name.as_str(), None);
    /// ```
    pub fn as_str(&self) -> Option<&str> {
        #[cfg(not(unix))]
        let (bad_char, bad_name) = {
            static BAD_CHARS: &[char] = &[
                // Microsoft says these are invalid.
                '.', '<', '>', ':', '"', '/', '\\', '|', '?', '*',

                // `cmd.exe` treats these specially.
                ',', ';', '=',

                // These are treated specially by unix-like shells.
                '(', ')', '&', '#',
            ];

            // Microsoft says these are reserved.
            static BAD_NAMES: &[&str] = &[
                "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4",
                "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2",
                "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
            ];

            let bad_char = |c| BAD_CHARS.contains(&c) || c.is_control();
            let bad_name = |n| BAD_NAMES.contains(&n);
            (bad_char, bad_name)
        };

        #[cfg(unix)]
        let (bad_char, bad_name) = {
            static BAD_CHARS: &[char] = &[
                // These have special meaning in a file name.
                '.', '/', '\\',

                // These are treated specially by shells.
                '<', '>', '|', ':', '(', ')', '&', ';', '#', '?', '*',
            ];

            let bad_char = |c| BAD_CHARS.contains(&c) || c.is_control();
            let bad_name = |_| false;
            (bad_char, bad_name)
        };

        // Get the file name as a `str` without any extension(s).
        let file_name = std::path::Path::new(&self.0)
            .file_name()
            .and_then(|n| n.to_str())
            .and_then(|n| n.split(bad_char).find(|s| !s.is_empty()))?;

        // At this point, `file_name` can't contain `bad_chars` because of
        // `.split()`, but it can be empty or reserved.
        if file_name.is_empty() || bad_name(file_name) {
            return None;
        }

        Some(file_name)
    }

    /// Returns `true` if the _complete_ raw file name is safe.
    ///
    /// Note that `.as_str()` returns a safe _subset_ of the raw file name, if
    /// there is one. If this method returns `true`, then that subset is the
    /// complete raw file name.
    ///
    /// This method should be use sparingly. In particular, there is no
    /// advantage to calling `is_safe()` prior to calling `as_str()`; simply
    /// call `as_str()`.
    ///
    /// # Example
    ///
    /// ```rust
    /// use rocket::fs::FileName;
    ///
    /// let name = FileName::new("some-file.txt");
    /// assert_eq!(name.as_str(), Some("some-file"));
    /// assert!(!name.is_safe());
    ///
    /// let name = FileName::new("some-file");
    /// assert_eq!(name.as_str(), Some("some-file"));
    /// assert!(name.is_safe());
    /// ```
    pub fn is_safe(&self) -> bool {
        self.as_str().map_or(false, |s| s == &self.0)
    }

    /// The raw, unsanitized, potentially unsafe file name. Prefer to use
    /// [`FileName::as_str()`], always.
    ///
    /// # ⚠️ DANGER ⚠️
    ///
    /// This method returns the file name exactly as it was specified by the
    /// client. You should **_not_** use this name _unless_ you require the
    /// originally specified `filename` _and_ it is known not to contain
    /// special, potentially dangerous characters, _and_:
    ///
    ///   1. All clients are known to be trusted, perhaps because the server
    ///      only runs locally, serving known, local requests, or...
    ///
    ///   2. You will not use the file name to store a file on disk or any
    ///      context that expects a file name _and_ you will not use the
    ///      extension to determine how to handle/parse the data, or...
    ///
    ///   3. You will expertly process the raw name into a sanitized version for
    ///      use in specific contexts.
    ///
    /// If not all of these cases apply, use [`FileName::as_str()`].
    ///
    /// # Example
    ///
    /// ```rust
    /// use rocket::fs::FileName;
    ///
    /// let name = FileName::new("some-file.txt");
    /// assert_eq!(name.dangerous_unsafe_unsanitized_raw(), "some-file.txt");
    ///
    /// let name = FileName::new("../../../../etc/shadow");
    /// assert_eq!(name.dangerous_unsafe_unsanitized_raw(), "../../../../etc/shadow");
    ///
    /// let name = FileName::new("../../.ssh/id_rsa");
    /// assert_eq!(name.dangerous_unsafe_unsanitized_raw(), "../../.ssh/id_rsa");
    ///
    /// let name = FileName::new("/Rocket.toml");
    /// assert_eq!(name.dangerous_unsafe_unsanitized_raw(), "/Rocket.toml");
    /// ```
    pub fn dangerous_unsafe_unsanitized_raw(&self) -> &RawStr {
        self.0.into()
    }
}

impl<'a, S: AsRef<str> + ?Sized> From<&'a S> for &'a FileName {
    #[inline]
    fn from(string: &'a S) -> Self {
        FileName::new(string)
    }
}