1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
// SPDX-License-Identifier: GPL-2.0

//! Async networking.

use crate::{bindings, error::code::*, net, sync::NoWaitLock, types::Opaque, Result};
use core::{
    future::Future,
    marker::{PhantomData, PhantomPinned},
    ops::Deref,
    pin::Pin,
    task::{Context, Poll, Waker},
};

/// A socket listening on a TCP port.
///
/// The [`TcpListener::accept`] method is meant to be used in async contexts.
pub struct TcpListener {
    listener: net::TcpListener,
}

impl TcpListener {
    /// Creates a new TCP listener.
    ///
    /// It is configured to listen on the given socket address for the given namespace.
    pub fn try_new(ns: &net::Namespace, addr: &net::SocketAddr) -> Result<Self> {
        Ok(Self {
            listener: net::TcpListener::try_new(ns, addr)?,
        })
    }

    /// Accepts a new connection.
    ///
    /// Returns a future that when ready indicates the result of the accept operation; on success,
    /// it contains the newly-accepted tcp stream.
    pub fn accept(&self) -> impl Future<Output = Result<TcpStream>> + '_ {
        SocketFuture::from_listener(
            self,
            bindings::BINDINGS_EPOLLIN | bindings::BINDINGS_EPOLLERR,
            || {
                Ok(TcpStream {
                    stream: self.listener.accept(false)?,
                })
            },
        )
    }
}

impl Deref for TcpListener {
    type Target = net::TcpListener;

    fn deref(&self) -> &Self::Target {
        &self.listener
    }
}

/// A connected TCP socket.
///
/// The potentially blocking methods (e.g., [`TcpStream::read`], [`TcpStream::write`]) are meant
/// to be used in async contexts.
///
/// # Examples
///
/// ```
/// # use kernel::prelude::*;
/// # use kernel::kasync::net::TcpStream;
/// async fn echo_server(stream: TcpStream) -> Result {
///     let mut buf = [0u8; 1024];
///     loop {
///         let n = stream.read(&mut buf).await?;
///         if n == 0 {
///             return Ok(());
///         }
///         stream.write_all(&buf[..n]).await?;
///     }
/// }
/// ```
pub struct TcpStream {
    stream: net::TcpStream,
}

impl TcpStream {
    /// Reads data from a connected socket.
    ///
    /// Returns a future that when ready indicates the result of the read operation; on success, it
    /// contains the number of bytes read, which will be zero if the connection is closed.
    pub fn read<'a>(&'a self, buf: &'a mut [u8]) -> impl Future<Output = Result<usize>> + 'a {
        SocketFuture::from_stream(
            self,
            bindings::BINDINGS_EPOLLIN | bindings::BINDINGS_EPOLLHUP | bindings::BINDINGS_EPOLLERR,
            || self.stream.read(buf, false),
        )
    }

    /// Writes data to the connected socket.
    ///
    /// Returns a future that when ready indicates the result of the write operation; on success, it
    /// contains the number of bytes written.
    pub fn write<'a>(&'a self, buf: &'a [u8]) -> impl Future<Output = Result<usize>> + 'a {
        SocketFuture::from_stream(
            self,
            bindings::BINDINGS_EPOLLOUT | bindings::BINDINGS_EPOLLHUP | bindings::BINDINGS_EPOLLERR,
            || self.stream.write(buf, false),
        )
    }

    /// Writes all the data to the connected socket.
    ///
    /// Returns a future that when ready indicates the result of the write operation; on success, it
    /// has written all the data.
    pub async fn write_all<'a>(&'a self, buf: &'a [u8]) -> Result {
        let mut rem = buf;

        while !rem.is_empty() {
            let n = self.write(rem).await?;
            rem = &rem[n..];
        }

        Ok(())
    }
}

impl Deref for TcpStream {
    type Target = net::TcpStream;

    fn deref(&self) -> &Self::Target {
        &self.stream
    }
}

/// A future for a socket operation.
///
/// # Invariants
///
/// `sock` is always non-null and valid for the duration of the lifetime of the instance.
struct SocketFuture<'a, Out, F: FnMut() -> Result<Out> + Send + 'a> {
    sock: *mut bindings::socket,
    mask: u32,
    is_queued: bool,
    wq_entry: Opaque<bindings::wait_queue_entry>,
    waker: NoWaitLock<Option<Waker>>,
    _p: PhantomData<&'a ()>,
    _pin: PhantomPinned,
    operation: F,
}

// SAFETY: A kernel socket can be used from any thread, `wq_entry` is only used on drop and when
// `is_queued` is initially `false`.
unsafe impl<Out, F: FnMut() -> Result<Out> + Send> Send for SocketFuture<'_, Out, F> {}

impl<'a, Out, F: FnMut() -> Result<Out> + Send + 'a> SocketFuture<'a, Out, F> {
    /// Creates a new socket future.
    ///
    /// # Safety
    ///
    /// Callers must ensure that `sock` is non-null, valid, and remains valid for the lifetime
    /// (`'a`) of the returned instance.
    unsafe fn new(sock: *mut bindings::socket, mask: u32, operation: F) -> Self {
        Self {
            sock,
            mask,
            is_queued: false,
            wq_entry: Opaque::uninit(),
            waker: NoWaitLock::new(None),
            operation,
            _p: PhantomData,
            _pin: PhantomPinned,
        }
    }

    /// Creates a new socket future for a tcp listener.
    fn from_listener(listener: &'a TcpListener, mask: u32, operation: F) -> Self {
        // SAFETY: The socket is guaranteed to remain valid because it is bound to the reference to
        // the listener (whose existence guarantees the socket remains valid).
        unsafe { Self::new(listener.listener.sock, mask, operation) }
    }

    /// Creates a new socket future for a tcp stream.
    fn from_stream(stream: &'a TcpStream, mask: u32, operation: F) -> Self {
        // SAFETY: The socket is guaranteed to remain valid because it is bound to the reference to
        // the stream (whose existence guarantees the socket remains valid).
        unsafe { Self::new(stream.stream.sock, mask, operation) }
    }

    /// Callback called when the socket changes state.
    ///
    /// If the state matches the one we're waiting on, we wake up the task so that the future can be
    /// polled again.
    unsafe extern "C" fn wake_callback(
        wq_entry: *mut bindings::wait_queue_entry,
        _mode: core::ffi::c_uint,
        _flags: core::ffi::c_int,
        key: *mut core::ffi::c_void,
    ) -> core::ffi::c_int {
        let mask = key as u32;

        // SAFETY: The future is valid while this callback is called because we remove from the
        // queue on drop.
        //
        // There is a potential soundness issue here because we're generating a shared reference to
        // `Self` while `Self::poll` has a mutable (unique) reference. However, for `!Unpin` types
        // (like `Self`), `&mut T` is treated as `*mut T` per
        // <https://github.com/rust-lang/rust/issues/63818> -- so we avoid the unsoundness. Once a
        // more definitive solution is available, we can change this to use it.
        let s = unsafe { &*crate::container_of!(wq_entry, Self, wq_entry) };
        if mask & s.mask == 0 {
            // Nothing to do as this notification doesn't interest us.
            return 0;
        }

        // If we can't acquire the waker lock, the waker is in the process of being modified. Our
        // attempt to acquire the lock will be reported to the lock owner, so it will trigger the
        // wake up.
        if let Some(guard) = s.waker.try_lock() {
            if let Some(ref w) = *guard {
                let cloned = w.clone();
                drop(guard);
                cloned.wake();
                return 1;
            }
        }
        0
    }

    /// Poll the future once.
    ///
    /// It calls the operation and converts `EAGAIN` errors into a pending state.
    fn poll_once(self: Pin<&mut Self>) -> Poll<Result<Out>> {
        // SAFETY: We never move out of `this`.
        let this = unsafe { self.get_unchecked_mut() };
        match (this.operation)() {
            Ok(s) => Poll::Ready(Ok(s)),
            Err(e) => {
                if e == EAGAIN {
                    Poll::Pending
                } else {
                    Poll::Ready(Err(e))
                }
            }
        }
    }

    /// Updates the waker stored in the future.
    ///
    /// It automatically triggers a wake up on races with the reactor.
    fn set_waker(&self, waker: &Waker) {
        if let Some(mut guard) = self.waker.try_lock() {
            let old = core::mem::replace(&mut *guard, Some(waker.clone()));
            let contention = guard.unlock();
            drop(old);
            if !contention {
                return;
            }
        }

        // We either couldn't store the waker because the existing one is being awakened, or the
        // reactor tried to acquire the lock while we held it (contention). In either case, we just
        // wake it up to ensure we don't miss any notification.
        waker.wake_by_ref();
    }
}

impl<Out, F: FnMut() -> Result<Out> + Send> Future for SocketFuture<'_, Out, F> {
    type Output = Result<Out>;

    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
        match self.as_mut().poll_once() {
            Poll::Ready(r) => Poll::Ready(r),
            Poll::Pending => {
                // Store away the latest waker every time we may `Pending`.
                self.set_waker(cx.waker());
                if self.is_queued {
                    // Nothing else to do was the waiter is already queued.
                    return Poll::Pending;
                }

                // SAFETY: We never move out of `this`.
                let this = unsafe { self.as_mut().get_unchecked_mut() };

                this.is_queued = true;

                // SAFETY: `wq_entry` is valid for write.
                unsafe {
                    bindings::init_waitqueue_func_entry(
                        this.wq_entry.get(),
                        Some(Self::wake_callback),
                    )
                };

                // SAFETY: `wq_entry` was just initialised above and is valid for read/write.
                // By the type invariants, the socket is always valid.
                unsafe {
                    bindings::add_wait_queue(
                        core::ptr::addr_of_mut!((*this.sock).wq.wait),
                        this.wq_entry.get(),
                    )
                };

                // If the future wasn't queued yet, we need to poll again in case it reached
                // the desired state between the last poll and being queued (in which case we
                // would have missed the notification).
                self.poll_once()
            }
        }
    }
}

impl<Out, F: FnMut() -> Result<Out> + Send> Drop for SocketFuture<'_, Out, F> {
    fn drop(&mut self) {
        if !self.is_queued {
            return;
        }

        // SAFETY: `wq_entry` is initialised because `is_queued` is set to `true`, so it is valid
        // for read/write. By the type invariants, the socket is always valid.
        unsafe {
            bindings::remove_wait_queue(
                core::ptr::addr_of_mut!((*self.sock).wq.wait),
                self.wq_entry.get(),
            )
        };
    }
}