rocket/data/
transform.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
use std::io;
use std::ops::{Deref, DerefMut};
use std::pin::Pin;
use std::task::{Poll, Context};

use tokio::io::ReadBuf;

/// Chainable, in-place, streaming data transformer.
///
/// [`Transform`] operates on [`TransformBuf`]s similar to how [`AsyncRead`]
/// operats on [`ReadBuf`]. A [`Transform`] sits somewhere in a chain of
/// transforming readers. The head (most upstream part) of the chain is _always_
/// an [`AsyncRead`]: the data source. The tail (all downstream parts) is
/// composed _only_ of other [`Transform`]s:
///
/// ```text
///                          downstream --->
///  AsyncRead | Transform | .. | Transform
/// <---- upstream
/// ```
///
/// When the upstream source makes data available, the
/// [`Transform::transform()`] method is called. [`Transform`]s may obtain the
/// subset of the filled section added by an upstream data source with
/// [`TransformBuf::fresh()`]. They may modify this data at will, potentially
/// changing the size of the filled section. For example,
/// [`TransformBuf::spoil()`] "removes" all of the fresh data, and
/// [`TransformBuf::fresh_mut()`] can be used to modify the data in-place.
///
/// Additionally, new data may be added in-place via the traditional approach:
/// write to (or overwrite) the initialized section of the buffer and mark it as
/// filled. All of the remaining filled data will be passed to downstream
/// transforms as "fresh" data. To add data to the end of the (potentially
/// rewritten) stream, the [`Transform::poll_finish()`] method can be
/// implemented.
///
/// [`AsyncRead`]: tokio::io::AsyncRead
pub trait Transform {
    /// Called when data is read from the upstream source. For any given fresh
    /// data, this method is called only once. [`TransformBuf::fresh()`] is
    /// guaranteed to contain at least one byte.
    ///
    /// While this method is not _async_ (it does not return [`Poll`]), it is
    /// nevertheless executed in an async context and should respect all such
    /// restrictions including not blocking.
    fn transform(
        self: Pin<&mut Self>,
        buf: &mut TransformBuf<'_, '_>,
    ) -> io::Result<()>;

    /// Called when the upstream is finished, that is, it has no more data to
    /// fill. At this point, the transform becomes an async reader. This method
    /// thus has identical semantics to [`AsyncRead::poll_read()`]. This method
    /// may never be called if the upstream does not finish.
    ///
    /// The default implementation returns `Poll::Ready(Ok(()))`.
    ///
    /// [`AsyncRead::poll_read()`]: tokio::io::AsyncRead::poll_read()
    fn poll_finish(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<io::Result<()>> {
        let (_, _) = (cx, buf);
        Poll::Ready(Ok(()))
    }
}

/// A buffer of transformable streaming data.
///
/// # Overview
///
/// A byte buffer, similar to a [`ReadBuf`], with a "fresh" dimension. Fresh
/// data is always a subset of the filled data, filled data is always a subset
/// of initialized data, and initialized data is always a subset of the buffer
/// itself. Both the filled and initialized data sections are guaranteed to be
/// at the start of the buffer, but the fresh subset is likely to begin
/// somewhere inside the filled section.
///
/// To visualize this, the diagram below represents a possible state for the
/// byte buffer being tracked. The square `[ ]` brackets represent the complete
/// buffer, while the curly `{ }` represent the named subset.
///
/// ```text
/// [  { !! fresh !! }                                 ]
/// { +++ filled +++ }          unfilled               ]
/// { ----- initialized ------ }     uninitialized     ]
/// [                    capacity                      ]
/// ```
///
/// The same buffer represented in its true single dimension is below:
///
/// ```text
/// [ ++!!!!!!!!!!!!!!---------xxxxxxxxxxxxxxxxxxxxxxxx]
/// ```
///
/// * `+`: filled (implies initialized)
/// * `!`: fresh (implies filled)
/// * `-`: unfilled / initialized (implies initialized)
/// * `x`: uninitialized (implies unfilled)
///
/// As with [`ReadBuf`], [`AsyncRead`] readers fill the initialized portion of a
/// [`TransformBuf`] to indicate that data is available. _Filling_ initialized
/// portions of the byte buffers is what increases the size of the _filled_
/// section. Because a [`ReadBuf`] may already be partially filled when a reader
/// adds bytes to it, a mechanism to track where the _newly_ filled portion
/// exists is needed. This is exactly what the "fresh" section tracks.
///
/// [`AsyncRead`]: tokio::io::AsyncRead
pub struct TransformBuf<'a, 'b> {
    pub(crate) buf: &'a mut ReadBuf<'b>,
    pub(crate) cursor: usize,
}

impl TransformBuf<'_, '_> {
    /// Returns a borrow to the fresh data: data filled by the upstream source.
    pub fn fresh(&self) -> &[u8] {
        &self.filled()[self.cursor..]
    }

    /// Returns a mutable borrow to the fresh data: data filled by the upstream
    /// source.
    pub fn fresh_mut(&mut self) -> &mut [u8] {
        let cursor = self.cursor;
        &mut self.filled_mut()[cursor..]
    }

    /// Spoils the fresh data by resetting the filled section to its value
    /// before any new data was added. As a result, the data will never be seen
    /// by any downstream consumer unless it is returned via another mechanism.
    pub fn spoil(&mut self) {
        let cursor = self.cursor;
        self.set_filled(cursor);
    }
}

pub struct Inspect(pub(crate) Box<dyn FnMut(&[u8]) + Send + Sync + 'static>);

impl Transform for Inspect {
    fn transform(mut self: Pin<&mut Self>, buf: &mut TransformBuf<'_, '_>) -> io::Result<()> {
        (self.0)(buf.fresh());
        Ok(())
    }
}

pub struct InPlaceMap(
    pub(crate) Box<dyn FnMut(&mut TransformBuf<'_, '_>) -> io::Result<()> + Send + Sync + 'static>
);

impl Transform for InPlaceMap {
    fn transform(mut self: Pin<&mut Self>, buf: &mut TransformBuf<'_, '_>,) -> io::Result<()> {
        (self.0)(buf)
    }
}

impl<'a, 'b> Deref for TransformBuf<'a, 'b> {
    type Target = ReadBuf<'b>;

    fn deref(&self) -> &Self::Target {
        self.buf
    }
}

impl<'a, 'b> DerefMut for TransformBuf<'a, 'b> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        self.buf
    }
}

// TODO: Test chaining various transform combinations:
//  * consume | consume
//  * add | consume
//  * consume | add
//  * add | add
// Where `add` is a transformer that adds data to the stream, and `consume` is
// one that removes data.
#[cfg(test)]
#[allow(deprecated)]
mod tests {
    use std::hash::SipHasher;
    use std::sync::{Arc, atomic::{AtomicU8, AtomicU64, Ordering}};

    use parking_lot::Mutex;
    use ubyte::ToByteUnit;

    use crate::http::Method;
    use crate::local::blocking::Client;
    use crate::fairing::AdHoc;
    use crate::{route, Route, Data, Response, Request};

    mod hash_transform {
        use std::io::Cursor;
        use std::hash::Hasher;

        use tokio::io::AsyncRead;

        use super::super::*;

        pub struct HashTransform<H: Hasher> {
            pub(crate) hasher: H,
            pub(crate) hash: Option<Cursor<[u8; 8]>>
        }

        impl<H: Hasher + Unpin> Transform for HashTransform<H> {
            fn transform(
                mut self: Pin<&mut Self>,
                buf: &mut TransformBuf<'_, '_>,
            ) -> io::Result<()> {
                self.hasher.write(buf.fresh());
                buf.spoil();
                Ok(())
            }

            fn poll_finish(
                mut self: Pin<&mut Self>,
                cx: &mut Context<'_>,
                buf: &mut ReadBuf<'_>,
            ) -> Poll<io::Result<()>> {
                if self.hash.is_none() {
                    let hash = self.hasher.finish();
                    self.hash = Some(Cursor::new(hash.to_be_bytes()));
                }

                let cursor = self.hash.as_mut().unwrap();
                Pin::new(cursor).poll_read(cx, buf)
            }
        }

        impl crate::Data<'_> {
            /// Chain an in-place hash [`Transform`] to `self`.
            pub fn chain_hash_transform<H: std::hash::Hasher>(&mut self, hasher: H) -> &mut Self
                where H: Unpin + Send + Sync + 'static
            {
                self.chain_transform(HashTransform { hasher, hash: None })
            }
        }
    }

    #[test]
    fn test_transform_series() {
        fn handler<'r>(_: &'r Request<'_>, data: Data<'r>) -> route::BoxFuture<'r> {
            Box::pin(async move {
                data.open(128.bytes()).stream_to(tokio::io::sink()).await.expect("read ok");
                route::Outcome::Success(Response::new())
            })
        }

        let inspect2: Arc<AtomicU8> = Arc::new(AtomicU8::new(0));
        let raw_data: Arc<Mutex<Vec<u8>>> = Arc::new(Mutex::new(Vec::new()));
        let hash: Arc<AtomicU64> = Arc::new(AtomicU64::new(0));
        let rocket = crate::build()
            .manage(hash.clone())
            .manage(raw_data.clone())
            .manage(inspect2.clone())
            .mount("/", vec![Route::new(Method::Post, "/", handler)])
            .attach(AdHoc::on_request("transforms", |req, data| Box::pin(async {
                let hash1 = req.rocket().state::<Arc<AtomicU64>>().cloned().unwrap();
                let hash2 = req.rocket().state::<Arc<AtomicU64>>().cloned().unwrap();
                let raw_data = req.rocket().state::<Arc<Mutex<Vec<u8>>>>().cloned().unwrap();
                let inspect2 = req.rocket().state::<Arc<AtomicU8>>().cloned().unwrap();
                data.chain_inspect(move |bytes| { *raw_data.lock() = bytes.to_vec(); })
                    .chain_hash_transform(SipHasher::new())
                    .chain_inspect(move |bytes| {
                        assert_eq!(bytes.len(), 8);
                        let bytes: [u8; 8] = bytes.try_into().expect("[u8; 8]");
                        let value = u64::from_be_bytes(bytes);
                        hash1.store(value, Ordering::Release);
                    })
                    .chain_inspect(move |bytes| {
                        assert_eq!(bytes.len(), 8);
                        let bytes: [u8; 8] = bytes.try_into().expect("[u8; 8]");
                        let value = u64::from_be_bytes(bytes);
                        let prev = hash2.load(Ordering::Acquire);
                        assert_eq!(prev, value);
                        inspect2.fetch_add(1, Ordering::Release);
                    });
            })));

        // Make sure nothing has happened yet.
        assert!(raw_data.lock().is_empty());
        assert_eq!(hash.load(Ordering::Acquire), 0);
        assert_eq!(inspect2.load(Ordering::Acquire), 0);

        // Check that nothing happens if the data isn't read.
        let client = Client::debug(rocket).unwrap();
        client.get("/").body("Hello, world!").dispatch();
        assert!(raw_data.lock().is_empty());
        assert_eq!(hash.load(Ordering::Acquire), 0);
        assert_eq!(inspect2.load(Ordering::Acquire), 0);

        // Check inspect + hash + inspect + inspect.
        client.post("/").body("Hello, world!").dispatch();
        assert_eq!(raw_data.lock().as_slice(), "Hello, world!".as_bytes());
        assert_eq!(hash.load(Ordering::Acquire), 0xae5020d7cf49d14f);
        assert_eq!(inspect2.load(Ordering::Acquire), 1);

        // Check inspect + hash + inspect + inspect, round 2.
        let string = "Rocket, Rocket, where art thee? Oh, tis in the sky, I see!";
        client.post("/").body(string).dispatch();
        assert_eq!(raw_data.lock().as_slice(), string.as_bytes());
        assert_eq!(hash.load(Ordering::Acquire), 0x323f9aa98f907faf);
        assert_eq!(inspect2.load(Ordering::Acquire), 2);
    }
}