From 7d053bd62c863749dc24bfdc45e42854b647ca92 Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Mon, 20 Apr 2020 14:59:17 +0200 Subject: [PATCH] linux56/57-tkg: Update fsync patchset to v3 - Squashed from https://gitlab.collabora.com/tonyk/linux/-/commits/futex-proton-v3 --- linux56-tkg/PKGBUILD | 4 +- .../linux56-tkg-patches/0007-v5.6-fsync.patch | 1127 ++++++++++++----- linux57-rc-tkg/PKGBUILD | 2 +- .../linux57-tkg-patches/0007-v5.7-fsync.patch | 1127 ++++++++++++----- 4 files changed, 1619 insertions(+), 641 deletions(-) diff --git a/linux56-tkg/PKGBUILD b/linux56-tkg/PKGBUILD index 39cf0bc..bd6aabf 100644 --- a/linux56-tkg/PKGBUILD +++ b/linux56-tkg/PKGBUILD @@ -89,7 +89,7 @@ pkgname=("${pkgbase}" "${pkgbase}-headers") _basekernel=5.6 _sub=5 pkgver="${_basekernel}"."${_sub}" -pkgrel=11 +pkgrel=12 pkgdesc='Linux-tkg' arch=('x86_64') # no i686 in here url="http://www.kernel.org/" @@ -137,7 +137,7 @@ sha256sums=('e342b04a2aa63808ea0ef1baab28fc520bd031ef8cf93d9ee4a31d4058fcb622' '7fd8e776209dac98627453fda754bdf9aff4a09f27cb0b3766d7983612eb3c74' '9ddfb1abaa01acf70e1352db1564fba591c2229d71d3c912213915388e944d6f' '90917e09bb06fbed6853efe9e52f8c2ba4066fca44accdf7608222212561104a' - '2d9260b80b43bbd605cf420d6bd53aa7262103dfd77196ba590ece5600b6dc0d' + 'cd225e86d72eaf6c31ef3d7b20df397f4cc44ddd04389850691292cdf292b204' '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177' '965a517a283f265a012545fbb5cc9e516efc9f6166d2aa1baf7293a32a1086b7' '2340925904efa3594cc65a7bae4fbff233d5d8bc7db605ce08acaca7450d2471' diff --git a/linux56-tkg/linux56-tkg-patches/0007-v5.6-fsync.patch b/linux56-tkg/linux56-tkg-patches/0007-v5.6-fsync.patch index 027116f..01c86d8 100644 --- a/linux56-tkg/linux56-tkg-patches/0007-v5.6-fsync.patch +++ b/linux56-tkg/linux56-tkg-patches/0007-v5.6-fsync.patch @@ -1,193 +1,44 @@ -split the futex key setup from the queue locking and key reading. This -is useful to support the setup of multiple keys at the same time, like -what is done in futex_requeue() and what will be done for the -FUTEX_WAIT_MULTIPLE command. - -Signed-off-by: Gabriel Krisman Bertazi ---- - kernel/futex.c | 71 +++++++++++++++++++++++++++++--------------------- - 1 file changed, 42 insertions(+), 29 deletions(-) - -diff --git a/kernel/futex.c b/kernel/futex.c -index 6d50728ef2e7..91f3db335c57 100644 ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -2631,6 +2631,39 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, - __set_current_state(TASK_RUNNING); - } - -+static int __futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, -+ struct futex_q *q, struct futex_hash_bucket **hb) -+{ -+ -+ u32 uval; -+ int ret; -+ -+retry_private: -+ *hb = queue_lock(q); -+ -+ ret = get_futex_value_locked(&uval, uaddr); -+ -+ if (ret) { -+ queue_unlock(*hb); -+ -+ ret = get_user(uval, uaddr); -+ if (ret) -+ return ret; -+ -+ if (!(flags & FLAGS_SHARED)) -+ goto retry_private; -+ -+ return 1; -+ } -+ -+ if (uval != val) { -+ queue_unlock(*hb); -+ ret = -EWOULDBLOCK; -+ } -+ -+ return ret; -+} -+ - /** - * futex_wait_setup() - Prepare to wait on a futex - * @uaddr: the futex userspace address -@@ -2651,7 +2684,6 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, - static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, - struct futex_q *q, struct futex_hash_bucket **hb) - { -- u32 uval; - int ret; - - /* -@@ -2672,38 +2704,19 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, - * absorb a wakeup if *uaddr does not match the desired values - * while the syscall executes. - */ --retry: -- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ); -- if (unlikely(ret != 0)) -- return ret; -- --retry_private: -- *hb = queue_lock(q); -+ do { -+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, -+ &q->key, FUTEX_READ); -+ if (unlikely(ret != 0)) -+ return ret; - -- ret = get_futex_value_locked(&uval, uaddr); -+ ret = __futex_wait_setup(uaddr, val, flags, q, hb); - -- if (ret) { -- queue_unlock(*hb); -- -- ret = get_user(uval, uaddr); -+ /* Drop key reference if retry or error. */ - if (ret) -- goto out; -+ put_futex_key(&q->key); -+ } while (ret > 0); - -- if (!(flags & FLAGS_SHARED)) -- goto retry_private; -- -- put_futex_key(&q->key); -- goto retry; -- } -- -- if (uval != val) { -- queue_unlock(*hb); -- ret = -EWOULDBLOCK; -- } -- --out: -- if (ret) -- put_futex_key(&q->key); - return ret; - } - --- -2.20.1 - -This is a new futex operation, called FUTEX_WAIT_MULTIPLE, which allows -a thread to wait on several futexes at the same time, and be awoken by -any of them. In a sense, it implements one of the features that was -supported by pooling on the old FUTEX_FD interface. - -My use case for this operation lies in Wine, where we want to implement -a similar interface available in Windows, used mainly for event -handling. The wine folks have an implementation that uses eventfd, but -it suffers from FD exhaustion (I was told they have application that go -to the order of multi-milion FDs), and higher CPU utilization. - -In time, we are also proposing modifications to glibc and libpthread to -make this feature available for Linux native multithreaded applications -using libpthread, which can benefit from the behavior of waiting on any -of a group of futexes. - -In particular, using futexes in our Wine use case reduced the CPU -utilization by 4% for the game Beat Saber and by 1.5% for the game -Shadow of Tomb Raider, both running over Proton (a wine based solution -for Windows emulation), when compared to the eventfd interface. This -implementation also doesn't rely of file descriptors, so it doesn't risk -overflowing the resource. - -Technically, the existing FUTEX_WAIT implementation can be easily -reworked by using do_futex_wait_multiple with a count of one, and I -have a patch showing how it works. I'm not proposing it, since -futex is such a tricky code, that I'd be more confortable to have -FUTEX_WAIT_MULTIPLE running upstream for a couple development cycles, -before considering modifying FUTEX_WAIT. - -From an implementation perspective, the futex list is passed as an array -of (pointer,value,bitset) to the kernel, which will enqueue all of them -and sleep if none was already triggered. It returns a hint of which -futex caused the wake up event to userspace, but the hint doesn't -guarantee that is the only futex triggered. Before calling the syscall -again, userspace should traverse the list, trying to re-acquire any of -the other futexes, to prevent an immediate -EWOULDBLOCK return code from -the kernel. - -This was tested using three mechanisms: - -1) By reimplementing FUTEX_WAIT in terms of FUTEX_WAIT_MULTIPLE and -running the unmodified tools/testing/selftests/futex and a full linux -distro on top of this kernel. - -2) By an example code that exercises the FUTEX_WAIT_MULTIPLE path on a -multi-threaded, event-handling setup. - -3) By running the Wine fsync implementation and executing multi-threaded -applications, in particular the modern games mentioned above, on top of -this implementation. - -Signed-off-by: Zebediah Figura -Signed-off-by: Steven Noonan -Signed-off-by: Pierre-Loup A. Griffais -Signed-off-by: Gabriel Krisman Bertazi ---- - include/uapi/linux/futex.h | 7 ++ - kernel/futex.c | 161 ++++++++++++++++++++++++++++++++++++- - 2 files changed, 164 insertions(+), 4 deletions(-) +From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Mon, 20 Apr 2020 14:09:11 +0200 +Subject: Import Fsync v3 patchset - Squashed from https://gitlab.collabora.com/tonyk/linux/-/commits/futex-proton-v3 diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h -index a89eb0accd5e..2401c4cf5095 100644 +index a89eb0accd5e2ee527be1e3e11b1117ff5bf94b4..580001e89c6caed57dd8b3cb491d65dce846caff 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -21,6 +21,7 @@ #define FUTEX_WAKE_BITSET 10 #define FUTEX_WAIT_REQUEUE_PI 11 #define FUTEX_CMP_REQUEUE_PI 12 -+#define FUTEX_WAIT_MULTIPLE 31 ++#define FUTEX_WAIT_MULTIPLE 13 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 -@@ -150,4 +151,10 @@ struct robust_list_head { +@@ -40,6 +41,8 @@ + FUTEX_PRIVATE_FLAG) + #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) ++#define FUTEX_WAIT_MULTIPLE_PRIVATE (FUTEX_WAIT_MULTIPLE | \ ++ FUTEX_PRIVATE_FLAG) + + /* + * Support for robust futexes: the kernel cleans up held futexes at +@@ -150,4 +153,21 @@ struct robust_list_head { (((op & 0xf) << 28) | ((cmp & 0xf) << 24) \ | ((oparg & 0xfff) << 12) | (cmparg & 0xfff)) ++/* ++ * Maximum number of multiple futexes to wait for ++ */ ++#define FUTEX_MULTIPLE_MAX_COUNT 128 ++ ++/** ++ * struct futex_wait_block - Block of futexes to be waited for ++ * @uaddr: User address of the futex ++ * @val: Futex value expected by userspace ++ * @bitset: Bitset for the optional bitmasked wakeup ++ */ +struct futex_wait_block { + __u32 __user *uaddr; + __u32 val; @@ -196,189 +47,314 @@ index a89eb0accd5e..2401c4cf5095 100644 + #endif /* _UAPI_LINUX_FUTEX_H */ diff --git a/kernel/futex.c b/kernel/futex.c -index 91f3db335c57..2623e8f152cd 100644 +index 0cf84c8664f207c574325b899ef2e57f01295a94..58cf9eb2b851b4858e29b5ef4114a29a92e676ba 100644 --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -183,6 +183,7 @@ static int __read_mostly futex_cmpxchg_enabled; - #endif - #define FLAGS_CLOCKRT 0x02 - #define FLAGS_HAS_TIMEOUT 0x04 -+#define FLAGS_WAKE_MULTIPLE 0x08 +@@ -215,6 +215,8 @@ struct futex_pi_state { + * @rt_waiter: rt_waiter storage for use with requeue_pi + * @requeue_pi_key: the requeue_pi target futex key + * @bitset: bitset for the optional bitmasked wakeup ++ * @uaddr: userspace address of futex ++ * @uval: expected futex's value + * + * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so + * we can wake only the relevant ones (hashed queues may be shared). +@@ -237,6 +239,8 @@ struct futex_q { + struct rt_mutex_waiter *rt_waiter; + union futex_key *requeue_pi_key; + u32 bitset; ++ u32 __user *uaddr; ++ u32 uval; + } __randomize_layout; - /* - * Priority Inheritance state: -@@ -2720,6 +2721,150 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, + static const struct futex_q futex_q_init = { +@@ -2420,6 +2424,29 @@ static int unqueue_me(struct futex_q *q) return ret; } -+static int do_futex_wait_multiple(struct futex_wait_block *wb, -+ u32 count, unsigned int flags, -+ ktime_t *abs_time) ++/** ++ * unqueue_multiple() - Remove several futexes from their futex_hash_bucket ++ * @q: The list of futexes to unqueue ++ * @count: Number of futexes in the list ++ * ++ * Helper to unqueue a list of futexes. This can't fail. ++ * ++ * Return: ++ * - >=0 - Index of the last futex that was awoken; ++ * - -1 - If no futex was awoken ++ */ ++static int unqueue_multiple(struct futex_q *q, int count) +{ -+ -+ struct hrtimer_sleeper timeout, *to; -+ struct futex_hash_bucket *hb; -+ struct futex_q *qs = NULL; -+ int ret; ++ int ret = -1; + int i; + -+ qs = kcalloc(count, sizeof(struct futex_q), GFP_KERNEL); -+ if (!qs) -+ return -ENOMEM; ++ for (i = 0; i < count; i++) { ++ if (!unqueue_me(&q[i])) ++ ret = i; ++ } ++ return ret; ++} + -+ to = futex_setup_timer(abs_time, &timeout, flags, -+ current->timer_slack_ns); -+ retry: + /* + * PI futexes can not be requeued and must remove themself from the + * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry +@@ -2783,6 +2810,211 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, + return ret; + } + ++/** ++ * futex_wait_multiple_setup() - Prepare to wait and enqueue multiple futexes ++ * @qs: The corresponding futex list ++ * @count: The size of the lists ++ * @flags: Futex flags (FLAGS_SHARED, etc.) ++ * @awaken: Index of the last awoken futex ++ * ++ * Prepare multiple futexes in a single step and enqueue them. This may fail if ++ * the futex list is invalid or if any futex was already awoken. On success the ++ * task is ready to interruptible sleep. ++ * ++ * Return: ++ * - 1 - One of the futexes was awaken by another thread ++ * - 0 - Success ++ * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL ++ */ ++static int futex_wait_multiple_setup(struct futex_q *qs, int count, ++ unsigned int flags, int *awaken) ++{ ++ struct futex_hash_bucket *hb; ++ int ret, i; ++ u32 uval; ++ ++ /* ++ * Enqueuing multiple futexes is tricky, because we need to ++ * enqueue each futex in the list before dealing with the next ++ * one to avoid deadlocking on the hash bucket. But, before ++ * enqueuing, we need to make sure that current->state is ++ * TASK_INTERRUPTIBLE, so we don't absorb any awake events, which ++ * cannot be done before the get_futex_key of the next key, ++ * because it calls get_user_pages, which can sleep. Thus, we ++ * fetch the list of futexes keys in two steps, by first pinning ++ * all the memory keys in the futex key, and only then we read ++ * each key and queue the corresponding futex. ++ */ ++retry: + for (i = 0; i < count; i++) { + qs[i].key = FUTEX_KEY_INIT; -+ qs[i].bitset = wb[i].bitset; -+ -+ ret = get_futex_key(wb[i].uaddr, flags & FLAGS_SHARED, ++ ret = get_futex_key(qs[i].uaddr, flags & FLAGS_SHARED, + &qs[i].key, FUTEX_READ); -+ if (unlikely(ret != 0)) { ++ if (unlikely(ret)) { + for (--i; i >= 0; i--) + put_futex_key(&qs[i].key); -+ goto out; ++ return ret; + } + } + + set_current_state(TASK_INTERRUPTIBLE); + + for (i = 0; i < count; i++) { -+ ret = __futex_wait_setup(wb[i].uaddr, wb[i].val, -+ flags, &qs[i], &hb); -+ if (ret) { -+ /* Drop the failed key directly. keys 0..(i-1) -+ * will be put by unqueue_me. -+ */ -+ put_futex_key(&qs[i].key); ++ struct futex_q *q = &qs[i]; + -+ /* Undo the partial work we did. */ -+ for (--i; i >= 0; i--) -+ unqueue_me(&qs[i]); ++ hb = queue_lock(q); ++ ++ ret = get_futex_value_locked(&uval, q->uaddr); ++ if (ret) { ++ /* ++ * We need to try to handle the fault, which ++ * cannot be done without sleep, so we need to ++ * undo all the work already done, to make sure ++ * we don't miss any wake ups. Therefore, clean ++ * up, handle the fault and retry from the ++ * beginning. ++ */ ++ queue_unlock(hb); ++ ++ /* ++ * Keys 0..(i-1) are implicitly put ++ * on unqueue_multiple. ++ */ ++ put_futex_key(&q->key); ++ ++ *awaken = unqueue_multiple(qs, i); + + __set_current_state(TASK_RUNNING); -+ if (ret > 0) -+ goto retry; -+ goto out; ++ ++ /* ++ * On a real fault, prioritize the error even if ++ * some other futex was awoken. Userspace gave ++ * us a bad address, -EFAULT them. ++ */ ++ ret = get_user(uval, q->uaddr); ++ if (ret) ++ return ret; ++ ++ /* ++ * Even if the page fault was handled, If ++ * something was already awaken, we can safely ++ * give up and succeed to give a hint for userspace to ++ * acquire the right futex faster. ++ */ ++ if (*awaken >= 0) ++ return 1; ++ ++ goto retry; + } + -+ /* We can't hold to the bucket lock when dealing with -+ * the next futex. Queue ourselves now so we can unlock -+ * it before moving on. ++ if (uval != q->uval) { ++ queue_unlock(hb); ++ ++ put_futex_key(&qs[i].key); ++ ++ /* ++ * If something was already awaken, we can ++ * safely ignore the error and succeed. ++ */ ++ *awaken = unqueue_multiple(qs, i); ++ __set_current_state(TASK_RUNNING); ++ if (*awaken >= 0) ++ return 1; ++ ++ return -EWOULDBLOCK; ++ } ++ ++ /* ++ * The bucket lock can't be held while dealing with the ++ * next futex. Queue each futex at this moment so hb can ++ * be unlocked. + */ + queue_me(&qs[i], hb); + } ++ return 0; ++} + -+ if (to) -+ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); ++/** ++ * futex_wait_multiple() - Prepare to wait on and enqueue several futexes ++ * @qs: The list of futexes to wait on ++ * @op: Operation code from futex's syscall ++ * @count: The number of objects ++ * @abs_time: Timeout before giving up and returning to userspace ++ * ++ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function ++ * sleeps on a group of futexes and returns on the first futex that ++ * triggered, or after the timeout has elapsed. ++ * ++ * Return: ++ * - >=0 - Hint to the futex that was awoken ++ * - <0 - On error ++ */ ++static int futex_wait_multiple(struct futex_q *qs, int op, ++ u32 count, ktime_t *abs_time) ++{ ++ struct hrtimer_sleeper timeout, *to; ++ int ret, flags = 0, hint = 0; ++ unsigned int i; + -+ /* There is no easy to way to check if we are wake already on -+ * multiple futexes without waking through each one of them. So -+ * just sleep and let the scheduler handle it. -+ */ -+ if (!to || to->task) -+ freezable_schedule(); ++ if (!(op & FUTEX_PRIVATE_FLAG)) ++ flags |= FLAGS_SHARED; + -+ __set_current_state(TASK_RUNNING); ++ if (op & FUTEX_CLOCK_REALTIME) ++ flags |= FLAGS_CLOCKRT; + -+ ret = -ETIMEDOUT; -+ /* If we were woken (and unqueued), we succeeded. */ -+ for (i = 0; i < count; i++) -+ if (!unqueue_me(&qs[i])) -+ ret = i; ++ to = futex_setup_timer(abs_time, &timeout, flags, 0); ++ while (1) { ++ ret = futex_wait_multiple_setup(qs, count, flags, &hint); ++ if (ret) { ++ if (ret > 0) { ++ /* A futex was awaken during setup */ ++ ret = hint; ++ } ++ break; ++ } + -+ /* Succeed wakeup */ -+ if (ret >= 0) -+ goto out; ++ if (to) ++ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); + -+ /* Woken by triggered timeout */ -+ if (to && !to->task) -+ goto out; ++ /* ++ * Avoid sleeping if another thread already tried to ++ * wake us. ++ */ ++ for (i = 0; i < count; i++) { ++ if (plist_node_empty(&qs[i].list)) ++ break; ++ } + -+ /* -+ * We expect signal_pending(current), but we might be the -+ * victim of a spurious wakeup as well. -+ */ -+ if (!signal_pending(current)) -+ goto retry; ++ if (i == count && (!to || to->task)) ++ freezable_schedule(); + -+ ret = -ERESTARTSYS; -+ if (!abs_time) -+ goto out; ++ ret = unqueue_multiple(qs, count); ++ ++ __set_current_state(TASK_RUNNING); ++ ++ if (ret >= 0) ++ break; ++ if (to && !to->task) { ++ ret = -ETIMEDOUT; ++ break; ++ } else if (signal_pending(current)) { ++ ret = -ERESTARTSYS; ++ break; ++ } ++ /* ++ * The final case is a spurious wakeup, for ++ * which just retry. ++ */ ++ } + -+ ret = -ERESTART_RESTARTBLOCK; -+ out: + if (to) { + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + } + -+ kfree(qs); -+ return ret; -+} -+ -+static int futex_wait_multiple(u32 __user *uaddr, unsigned int flags, -+ u32 count, ktime_t *abs_time) -+{ -+ struct futex_wait_block *wb; -+ struct restart_block *restart; -+ int ret; -+ -+ if (!count) -+ return -EINVAL; -+ -+ wb = kcalloc(count, sizeof(struct futex_wait_block), GFP_KERNEL); -+ if (!wb) -+ return -ENOMEM; -+ -+ if (copy_from_user(wb, uaddr, -+ count * sizeof(struct futex_wait_block))) { -+ ret = -EFAULT; -+ goto out; -+ } -+ -+ ret = do_futex_wait_multiple(wb, count, flags, abs_time); -+ -+ if (ret == -ERESTART_RESTARTBLOCK) { -+ restart = ¤t->restart_block; -+ restart->fn = futex_wait_restart; -+ restart->futex.uaddr = uaddr; -+ restart->futex.val = count; -+ restart->futex.time = *abs_time; -+ restart->futex.flags = (flags | FLAGS_HAS_TIMEOUT | -+ FLAGS_WAKE_MULTIPLE); -+ } -+ -+out: -+ kfree(wb); + return ret; +} + static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) { -@@ -2797,6 +2942,10 @@ static long futex_wait_restart(struct restart_block *restart) - } - restart->fn = do_no_restart_syscall; - -+ if (restart->futex.flags & FLAGS_WAKE_MULTIPLE) -+ return (long)futex_wait_multiple(uaddr, restart->futex.flags, -+ restart->futex.val, tp); -+ - return (long)futex_wait(uaddr, restart->futex.flags, - restart->futex.val, tp, restart->futex.bitset); - } -@@ -3680,6 +3829,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, - uaddr2); - case FUTEX_CMP_REQUEUE_PI: - return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); -+ case FUTEX_WAIT_MULTIPLE: -+ return futex_wait_multiple(uaddr, flags, val, timeout); - } +@@ -3907,6 +4139,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, return -ENOSYS; } -@@ -3696,7 +3847,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + ++/** ++ * futex_read_wait_block - Read an array of futex_wait_block from userspace ++ * @uaddr: Userspace address of the block ++ * @count: Number of blocks to be read ++ * ++ * This function creates and allocate an array of futex_q (we zero it to ++ * initialize the fields) and then, for each futex_wait_block element from ++ * userspace, fill a futex_q element with proper values. ++ */ ++inline struct futex_q *futex_read_wait_block(u32 __user *uaddr, u32 count) ++{ ++ unsigned int i; ++ struct futex_q *qs; ++ struct futex_wait_block fwb; ++ struct futex_wait_block __user *entry = ++ (struct futex_wait_block __user *)uaddr; ++ ++ if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) ++ return ERR_PTR(-EINVAL); ++ ++ qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); ++ if (!qs) ++ return ERR_PTR(-ENOMEM); ++ ++ for (i = 0; i < count; i++) { ++ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { ++ kfree(qs); ++ return ERR_PTR(-EFAULT); ++ } ++ ++ qs[i].uaddr = fwb.uaddr; ++ qs[i].uval = fwb.val; ++ qs[i].bitset = fwb.bitset; ++ } ++ ++ return qs; ++} + + SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + struct __kernel_timespec __user *, utime, u32 __user *, uaddr2, +@@ -3919,7 +4188,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || @@ -388,16 +364,91 @@ index 91f3db335c57..2623e8f152cd 100644 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) return -EFAULT; if (get_timespec64(&ts, utime)) -@@ -3705,7 +3857,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - return -EINVAL; +@@ -3940,6 +4210,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) + val2 = (u32) (unsigned long) utime; - t = timespec64_to_ktime(ts); -- if (cmd == FUTEX_WAIT) -+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) - t = ktime_add_safe(ktime_get(), t); - tp = &t; - } -@@ -3889,14 +4041,15 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, ++ if (cmd == FUTEX_WAIT_MULTIPLE) { ++ int ret; ++ struct futex_q *qs; ++ ++#ifdef CONFIG_X86_X32 ++ if (unlikely(in_x32_syscall())) ++ return -ENOSYS; ++#endif ++ qs = futex_read_wait_block(uaddr, val); ++ ++ if (IS_ERR(qs)) ++ return PTR_ERR(qs); ++ ++ ret = futex_wait_multiple(qs, op, val, tp); ++ kfree(qs); ++ ++ return ret; ++ } ++ + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); + } + +@@ -4102,6 +4391,57 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + #endif /* CONFIG_COMPAT */ + + #ifdef CONFIG_COMPAT_32BIT_TIME ++/** ++ * struct compat_futex_wait_block - Block of futexes to be waited for ++ * @uaddr: User address of the futex (compatible pointer) ++ * @val: Futex value expected by userspace ++ * @bitset: Bitset for the optional bitmasked wakeup ++ */ ++struct compat_futex_wait_block { ++ compat_uptr_t uaddr; ++ __u32 val; ++ __u32 bitset; ++}; ++ ++/** ++ * compat_futex_read_wait_block - Read an array of futex_wait_block from ++ * userspace ++ * @uaddr: Userspace address of the block ++ * @count: Number of blocks to be read ++ * ++ * This function does the same as futex_read_wait_block(), except that it ++ * converts the pointer to the futex from the compat version to the regular one. ++ */ ++inline struct futex_q *compat_futex_read_wait_block(u32 __user *uaddr, ++ u32 count) ++{ ++ unsigned int i; ++ struct futex_q *qs; ++ struct compat_futex_wait_block fwb; ++ struct compat_futex_wait_block __user *entry = ++ (struct compat_futex_wait_block __user *)uaddr; ++ ++ if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) ++ return ERR_PTR(-EINVAL); ++ ++ qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); ++ if (!qs) ++ return ERR_PTR(-ENOMEM); ++ ++ for (i = 0; i < count; i++) { ++ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { ++ kfree(qs); ++ return ERR_PTR(-EFAULT); ++ } ++ ++ qs[i].uaddr = compat_ptr(fwb.uaddr); ++ qs[i].uval = fwb.val; ++ qs[i].bitset = fwb.bitset; ++ } ++ ++ return qs; ++} ++ + SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + struct old_timespec32 __user *, utime, u32 __user *, uaddr2, + u32, val3) +@@ -4113,7 +4453,8 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || @@ -407,6 +458,446 @@ index 91f3db335c57..2623e8f152cd 100644 if (get_old_timespec32(&ts, utime)) return -EFAULT; if (!timespec64_valid(&ts)) +@@ -4128,6 +4469,19 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) + val2 = (int) (unsigned long) utime; + ++ if (cmd == FUTEX_WAIT_MULTIPLE) { ++ int ret; ++ struct futex_q *qs = compat_futex_read_wait_block(uaddr, val); ++ ++ if (IS_ERR(qs)) ++ return PTR_ERR(qs); ++ ++ ret = futex_wait_multiple(qs, op, val, tp); ++ kfree(qs); ++ ++ return ret; ++ } ++ + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); + } + #endif /* CONFIG_COMPAT_32BIT_TIME */ +diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c +index ee55e6d389a3f053194435342c4e471dc7cf8786..2a63e1c2cfb6407a5988233217cff2e52787bc66 100644 +--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c ++++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c +@@ -11,6 +11,7 @@ + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart ++ * 2019-Dec-13: Add WAIT_MULTIPLE test by Krisman + * + *****************************************************************************/ + +@@ -41,6 +42,8 @@ int main(int argc, char *argv[]) + { + futex_t f1 = FUTEX_INITIALIZER; + struct timespec to; ++ time_t secs; ++ struct futex_wait_block fwb = {&f1, f1, 0}; + int res, ret = RET_PASS; + int c; + +@@ -65,7 +68,7 @@ int main(int argc, char *argv[]) + } + + ksft_print_header(); +- ksft_set_plan(1); ++ ksft_set_plan(2); + ksft_print_msg("%s: Block on a futex and wait for timeout\n", + basename(argv[0])); + ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); +@@ -79,8 +82,39 @@ int main(int argc, char *argv[]) + if (!res || errno != ETIMEDOUT) { + fail("futex_wait returned %d\n", ret < 0 ? errno : ret); + ret = RET_FAIL; ++ } else ++ ksft_test_result_pass("futex_wait timeout succeeds\n"); ++ ++ info("Calling futex_wait_multiple on f1: %u @ %p\n", f1, &f1); ++ ++ /* Setup absolute time */ ++ ret = clock_gettime(CLOCK_REALTIME, &to); ++ secs = (to.tv_nsec + timeout_ns) / 1000000000; ++ to.tv_nsec = ((int64_t)to.tv_nsec + timeout_ns) % 1000000000; ++ to.tv_sec += secs; ++ info("to.tv_sec = %ld\n", to.tv_sec); ++ info("to.tv_nsec = %ld\n", to.tv_nsec); ++ ++ res = futex_wait_multiple(&fwb, 1, &to, ++ FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME); ++ ++#ifdef __ILP32__ ++ if (res == -1 && errno == ENOSYS) { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } else { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; + } ++#else ++ if (!res || errno != ETIMEDOUT) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else ++ ksft_test_result_pass("futex_wait_multiple timeout succeeds\n"); ++#endif /* __ILP32__ */ + +- print_result(TEST_NAME, ret); ++ ksft_print_cnts(); + return ret; + } +diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h +index ddbcfc9b7bac4aebb5bac2f249e26ecfd948aa84..bb103bef4557012ef9a389ca74c868e4476a8a31 100644 +--- a/tools/testing/selftests/futex/include/futextest.h ++++ b/tools/testing/selftests/futex/include/futextest.h +@@ -38,6 +38,14 @@ typedef volatile u_int32_t futex_t; + #ifndef FUTEX_CMP_REQUEUE_PI + #define FUTEX_CMP_REQUEUE_PI 12 + #endif ++#ifndef FUTEX_WAIT_MULTIPLE ++#define FUTEX_WAIT_MULTIPLE 13 ++struct futex_wait_block { ++ futex_t *uaddr; ++ futex_t val; ++ __u32 bitset; ++}; ++#endif + #ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE + #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +@@ -80,6 +88,20 @@ futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags) + return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); + } + ++/** ++ * futex_wait_multiple() - block on several futexes with optional timeout ++ * @fwb: wait block user space address ++ * @count: number of entities at fwb ++ * @timeout: absolute timeout ++ */ ++static inline int ++futex_wait_multiple(struct futex_wait_block *fwb, int count, ++ struct timespec *timeout, int opflags) ++{ ++ return futex(fwb, FUTEX_WAIT_MULTIPLE, count, timeout, NULL, 0, ++ opflags); ++} ++ + /** + * futex_wake() - wake one or more tasks blocked on uaddr + * @nr_wake: wake up to this many tasks +diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +index 0ae390ff816449c88d0bb655a26eb014382c2b4f..bcbac042992d447e0bc9ef5fefe94e875de310f2 100644 +--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c ++++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +@@ -12,6 +12,7 @@ + * + * HISTORY + * 2009-Nov-14: Initial version by Gowrishankar ++ * 2019-Dec-13: Add WAIT_MULTIPLE test by Krisman + * + *****************************************************************************/ + +@@ -40,6 +41,7 @@ int main(int argc, char *argv[]) + { + struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + futex_t f1 = FUTEX_INITIALIZER; ++ struct futex_wait_block fwb = {&f1, f1+1, 0}; + int res, ret = RET_PASS; + int c; + +@@ -61,7 +63,7 @@ int main(int argc, char *argv[]) + } + + ksft_print_header(); +- ksft_set_plan(1); ++ ksft_set_plan(2); + ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", + basename(argv[0])); + +@@ -71,8 +73,30 @@ int main(int argc, char *argv[]) + fail("futex_wait returned: %d %s\n", + res ? errno : res, res ? strerror(errno) : ""); + ret = RET_FAIL; ++ } else ++ ksft_test_result_pass("futex_wait wouldblock succeeds\n"); ++ ++ info("Calling futex_wait_multiple on f1: %u @ %p with val=%u\n", ++ f1, &f1, f1+1); ++ res = futex_wait_multiple(&fwb, 1, NULL, FUTEX_PRIVATE_FLAG); ++ ++#ifdef __ILP32__ ++ if (res != -1 || errno != ENOSYS) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } ++#else ++ if (!res || errno != EWOULDBLOCK) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; + } ++ ksft_test_result_pass("futex_wait_multiple wouldblock succeeds\n"); ++#endif /* __ILP32__ */ + +- print_result(TEST_NAME, ret); ++ ksft_print_cnts(); + return ret; + } +diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore +index a09f570619023750f558c84004aff166b4337d72..4660128a545edb04a17cc6bd9760931c1386122f 100644 +--- a/tools/testing/selftests/futex/functional/.gitignore ++++ b/tools/testing/selftests/futex/functional/.gitignore +@@ -5,3 +5,4 @@ futex_wait_private_mapped_file + futex_wait_timeout + futex_wait_uninitialized_heap + futex_wait_wouldblock ++futex_wait_multiple +diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile +index 30996306cabcfe89a47977643e529b122893bb7e..75f9fface11fa3c90c1bdb9a49b3ea51291afd58 100644 +--- a/tools/testing/selftests/futex/functional/Makefile ++++ b/tools/testing/selftests/futex/functional/Makefile +@@ -14,7 +14,8 @@ TEST_GEN_FILES := \ + futex_requeue_pi_signal_restart \ + futex_requeue_pi_mismatched_ops \ + futex_wait_uninitialized_heap \ +- futex_wait_private_mapped_file ++ futex_wait_private_mapped_file \ ++ futex_wait_multiple + + TEST_PROGS := run.sh + +diff --git a/tools/testing/selftests/futex/functional/futex_wait_multiple.c b/tools/testing/selftests/futex/functional/futex_wait_multiple.c +new file mode 100644 +index 0000000000000000000000000000000000000000..b48422e79f42edba1653bb0bd2a4c4fd98d2d48d +--- /dev/null ++++ b/tools/testing/selftests/futex/functional/futex_wait_multiple.c +@@ -0,0 +1,173 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/****************************************************************************** ++ * ++ * Copyright © Collabora, Ltd., 2019 ++ * ++ * DESCRIPTION ++ * Test basic semantics of FUTEX_WAIT_MULTIPLE ++ * ++ * AUTHOR ++ * Gabriel Krisman Bertazi ++ * ++ * HISTORY ++ * 2019-Dec-13: Initial version by Krisman ++ * ++ *****************************************************************************/ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "futextest.h" ++#include "logging.h" ++ ++#define TEST_NAME "futex-wait-multiple" ++#define timeout_ns 100000 ++#define MAX_COUNT 128 ++#define WAKE_WAIT_US 3000000 ++ ++int ret = RET_PASS; ++char *progname; ++futex_t f[MAX_COUNT] = {0}; ++struct futex_wait_block fwb[MAX_COUNT]; ++ ++void usage(char *prog) ++{ ++ printf("Usage: %s\n", prog); ++ printf(" -c Use color\n"); ++ printf(" -h Display this help message\n"); ++ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", ++ VQUIET, VCRITICAL, VINFO); ++} ++ ++void test_count_overflow(void) ++{ ++ futex_t f = FUTEX_INITIALIZER; ++ struct futex_wait_block fwb[MAX_COUNT+1]; ++ int res, i; ++ ++ ksft_print_msg("%s: Test a too big number of futexes\n", progname); ++ ++ for (i = 0; i < MAX_COUNT+1; i++) { ++ fwb[i].uaddr = &f; ++ fwb[i].val = f; ++ fwb[i].bitset = 0; ++ } ++ ++ res = futex_wait_multiple(fwb, MAX_COUNT+1, NULL, FUTEX_PRIVATE_FLAG); ++ ++#ifdef __ILP32__ ++ if (res != -1 || errno != ENOSYS) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } ++#else ++ if (res != -1 || errno != EINVAL) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_pass("futex_wait_multiple count overflow succeed\n"); ++ } ++ ++#endif /* __ILP32__ */ ++} ++ ++void *waiterfn(void *arg) ++{ ++ int res; ++ ++ res = futex_wait_multiple(fwb, MAX_COUNT, NULL, FUTEX_PRIVATE_FLAG); ++ ++#ifdef __ILP32__ ++ if (res != -1 || errno != ENOSYS) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } ++#else ++ if (res < 0) ++ ksft_print_msg("waiter failed %d\n", res); ++ ++ info("futex_wait_multiple: Got hint futex %d was freed\n", res); ++#endif /* __ILP32__ */ ++ ++ return NULL; ++} ++ ++void test_fwb_wakeup(void) ++{ ++ int res, i; ++ pthread_t waiter; ++ ++ ksft_print_msg("%s: Test wake up in a list of futex\n", progname); ++ ++ for (i = 0; i < MAX_COUNT; i++) { ++ fwb[i].uaddr = &f[i]; ++ fwb[i].val = f[i]; ++ fwb[i].bitset = 0xffffffff; ++ } ++ ++ res = pthread_create(&waiter, NULL, waiterfn, NULL); ++ if (res) { ++ ksft_test_result_fail("Creating waiting thread failed"); ++ ksft_exit_fail(); ++ } ++ ++ usleep(WAKE_WAIT_US); ++ res = futex_wake(&(f[MAX_COUNT-1]), 1, FUTEX_PRIVATE_FLAG); ++ if (res != 1) { ++ ksft_test_result_fail("Failed to wake thread res=%d\n", res); ++ ksft_exit_fail(); ++ } ++ ++ pthread_join(waiter, NULL); ++ ksft_test_result_pass("%s succeed\n", __func__); ++} ++ ++int main(int argc, char *argv[]) ++{ ++ int c; ++ ++ while ((c = getopt(argc, argv, "cht:v:")) != -1) { ++ switch (c) { ++ case 'c': ++ log_color(1); ++ break; ++ case 'h': ++ usage(basename(argv[0])); ++ exit(0); ++ case 'v': ++ log_verbosity(atoi(optarg)); ++ break; ++ default: ++ usage(basename(argv[0])); ++ exit(1); ++ } ++ } ++ ++ progname = basename(argv[0]); ++ ++ ksft_print_header(); ++ ksft_set_plan(2); ++ ++ test_count_overflow(); ++ ++#ifdef __ILP32__ ++ // if it's a 32x binary, there's no futex to wakeup ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++#else ++ test_fwb_wakeup(); ++#endif /* __ILP32__ */ ++ ++ ksft_print_cnts(); ++ return ret; ++} +diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh +index 1acb6ace1680e8f3d6b3ee2dc528c19ddfdb018e..a8be94f28ff78b4879d2d19bca5d9b0fcb26c1f8 100755 +--- a/tools/testing/selftests/futex/functional/run.sh ++++ b/tools/testing/selftests/futex/functional/run.sh +@@ -73,3 +73,6 @@ echo + echo + ./futex_wait_uninitialized_heap $COLOR + ./futex_wait_private_mapped_file $COLOR ++ ++echo ++./futex_wait_multiple $COLOR +diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h +index 580001e89c6caed57dd8b3cb491d65dce846caff..a3e760886b8e7e74285fdcf2caaaa6f66ad16675 100644 +--- a/include/uapi/linux/futex.h ++++ b/include/uapi/linux/futex.h +@@ -21,7 +21,7 @@ + #define FUTEX_WAKE_BITSET 10 + #define FUTEX_WAIT_REQUEUE_PI 11 + #define FUTEX_CMP_REQUEUE_PI 12 +-#define FUTEX_WAIT_MULTIPLE 13 ++#define FUTEX_WAIT_MULTIPLE 31 + + #define FUTEX_PRIVATE_FLAG 128 + #define FUTEX_CLOCK_REALTIME 256 +diff --git a/kernel/futex.c b/kernel/futex.c +index 58cf9eb2b851b4858e29b5ef4114a29a92e676ba..e0bb628a5e1988dcc9ae5442a4259edc229d578d 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -4198,7 +4198,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + return -EINVAL; + + t = timespec64_to_ktime(ts); +- if (cmd == FUTEX_WAIT) ++ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) + t = ktime_add_safe(ktime_get(), t); + tp = &t; + } +@@ -4399,6 +4399,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + */ + struct compat_futex_wait_block { + compat_uptr_t uaddr; ++ __u32 pad; + __u32 val; + __u32 bitset; + }; +@@ -4461,7 +4462,7 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, return -EINVAL; t = timespec64_to_ktime(ts); @@ -415,5 +906,3 @@ index 91f3db335c57..2623e8f152cd 100644 t = ktime_add_safe(ktime_get(), t); tp = &t; } --- -2.20.1 diff --git a/linux57-rc-tkg/PKGBUILD b/linux57-rc-tkg/PKGBUILD index 73761f7..a697ce2 100644 --- a/linux57-rc-tkg/PKGBUILD +++ b/linux57-rc-tkg/PKGBUILD @@ -128,7 +128,7 @@ sha256sums=('7a5369e141ec8d6c139a9357bf9a4e668bac7364e4bd96e9fafe11e462a6071a' '7fd8e776209dac98627453fda754bdf9aff4a09f27cb0b3766d7983612eb3c74' '3767c745aed00798efcdc6c57dcdc5ca84573863a9b76c1461eda15ff6f62037' '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a' - '2d9260b80b43bbd605cf420d6bd53aa7262103dfd77196ba590ece5600b6dc0d' + 'cd225e86d72eaf6c31ef3d7b20df397f4cc44ddd04389850691292cdf292b204' '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177' '965a517a283f265a012545fbb5cc9e516efc9f6166d2aa1baf7293a32a1086b7' '5d95eac8dd9a5866f943d47d155f48d52dfd1218ffa19f95548fb20c1a54a90e' diff --git a/linux57-rc-tkg/linux57-tkg-patches/0007-v5.7-fsync.patch b/linux57-rc-tkg/linux57-tkg-patches/0007-v5.7-fsync.patch index 027116f..01c86d8 100644 --- a/linux57-rc-tkg/linux57-tkg-patches/0007-v5.7-fsync.patch +++ b/linux57-rc-tkg/linux57-tkg-patches/0007-v5.7-fsync.patch @@ -1,193 +1,44 @@ -split the futex key setup from the queue locking and key reading. This -is useful to support the setup of multiple keys at the same time, like -what is done in futex_requeue() and what will be done for the -FUTEX_WAIT_MULTIPLE command. - -Signed-off-by: Gabriel Krisman Bertazi ---- - kernel/futex.c | 71 +++++++++++++++++++++++++++++--------------------- - 1 file changed, 42 insertions(+), 29 deletions(-) - -diff --git a/kernel/futex.c b/kernel/futex.c -index 6d50728ef2e7..91f3db335c57 100644 ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -2631,6 +2631,39 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, - __set_current_state(TASK_RUNNING); - } - -+static int __futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, -+ struct futex_q *q, struct futex_hash_bucket **hb) -+{ -+ -+ u32 uval; -+ int ret; -+ -+retry_private: -+ *hb = queue_lock(q); -+ -+ ret = get_futex_value_locked(&uval, uaddr); -+ -+ if (ret) { -+ queue_unlock(*hb); -+ -+ ret = get_user(uval, uaddr); -+ if (ret) -+ return ret; -+ -+ if (!(flags & FLAGS_SHARED)) -+ goto retry_private; -+ -+ return 1; -+ } -+ -+ if (uval != val) { -+ queue_unlock(*hb); -+ ret = -EWOULDBLOCK; -+ } -+ -+ return ret; -+} -+ - /** - * futex_wait_setup() - Prepare to wait on a futex - * @uaddr: the futex userspace address -@@ -2651,7 +2684,6 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, - static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, - struct futex_q *q, struct futex_hash_bucket **hb) - { -- u32 uval; - int ret; - - /* -@@ -2672,38 +2704,19 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, - * absorb a wakeup if *uaddr does not match the desired values - * while the syscall executes. - */ --retry: -- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ); -- if (unlikely(ret != 0)) -- return ret; -- --retry_private: -- *hb = queue_lock(q); -+ do { -+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, -+ &q->key, FUTEX_READ); -+ if (unlikely(ret != 0)) -+ return ret; - -- ret = get_futex_value_locked(&uval, uaddr); -+ ret = __futex_wait_setup(uaddr, val, flags, q, hb); - -- if (ret) { -- queue_unlock(*hb); -- -- ret = get_user(uval, uaddr); -+ /* Drop key reference if retry or error. */ - if (ret) -- goto out; -+ put_futex_key(&q->key); -+ } while (ret > 0); - -- if (!(flags & FLAGS_SHARED)) -- goto retry_private; -- -- put_futex_key(&q->key); -- goto retry; -- } -- -- if (uval != val) { -- queue_unlock(*hb); -- ret = -EWOULDBLOCK; -- } -- --out: -- if (ret) -- put_futex_key(&q->key); - return ret; - } - --- -2.20.1 - -This is a new futex operation, called FUTEX_WAIT_MULTIPLE, which allows -a thread to wait on several futexes at the same time, and be awoken by -any of them. In a sense, it implements one of the features that was -supported by pooling on the old FUTEX_FD interface. - -My use case for this operation lies in Wine, where we want to implement -a similar interface available in Windows, used mainly for event -handling. The wine folks have an implementation that uses eventfd, but -it suffers from FD exhaustion (I was told they have application that go -to the order of multi-milion FDs), and higher CPU utilization. - -In time, we are also proposing modifications to glibc and libpthread to -make this feature available for Linux native multithreaded applications -using libpthread, which can benefit from the behavior of waiting on any -of a group of futexes. - -In particular, using futexes in our Wine use case reduced the CPU -utilization by 4% for the game Beat Saber and by 1.5% for the game -Shadow of Tomb Raider, both running over Proton (a wine based solution -for Windows emulation), when compared to the eventfd interface. This -implementation also doesn't rely of file descriptors, so it doesn't risk -overflowing the resource. - -Technically, the existing FUTEX_WAIT implementation can be easily -reworked by using do_futex_wait_multiple with a count of one, and I -have a patch showing how it works. I'm not proposing it, since -futex is such a tricky code, that I'd be more confortable to have -FUTEX_WAIT_MULTIPLE running upstream for a couple development cycles, -before considering modifying FUTEX_WAIT. - -From an implementation perspective, the futex list is passed as an array -of (pointer,value,bitset) to the kernel, which will enqueue all of them -and sleep if none was already triggered. It returns a hint of which -futex caused the wake up event to userspace, but the hint doesn't -guarantee that is the only futex triggered. Before calling the syscall -again, userspace should traverse the list, trying to re-acquire any of -the other futexes, to prevent an immediate -EWOULDBLOCK return code from -the kernel. - -This was tested using three mechanisms: - -1) By reimplementing FUTEX_WAIT in terms of FUTEX_WAIT_MULTIPLE and -running the unmodified tools/testing/selftests/futex and a full linux -distro on top of this kernel. - -2) By an example code that exercises the FUTEX_WAIT_MULTIPLE path on a -multi-threaded, event-handling setup. - -3) By running the Wine fsync implementation and executing multi-threaded -applications, in particular the modern games mentioned above, on top of -this implementation. - -Signed-off-by: Zebediah Figura -Signed-off-by: Steven Noonan -Signed-off-by: Pierre-Loup A. Griffais -Signed-off-by: Gabriel Krisman Bertazi ---- - include/uapi/linux/futex.h | 7 ++ - kernel/futex.c | 161 ++++++++++++++++++++++++++++++++++++- - 2 files changed, 164 insertions(+), 4 deletions(-) +From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Mon, 20 Apr 2020 14:09:11 +0200 +Subject: Import Fsync v3 patchset - Squashed from https://gitlab.collabora.com/tonyk/linux/-/commits/futex-proton-v3 diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h -index a89eb0accd5e..2401c4cf5095 100644 +index a89eb0accd5e2ee527be1e3e11b1117ff5bf94b4..580001e89c6caed57dd8b3cb491d65dce846caff 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -21,6 +21,7 @@ #define FUTEX_WAKE_BITSET 10 #define FUTEX_WAIT_REQUEUE_PI 11 #define FUTEX_CMP_REQUEUE_PI 12 -+#define FUTEX_WAIT_MULTIPLE 31 ++#define FUTEX_WAIT_MULTIPLE 13 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 -@@ -150,4 +151,10 @@ struct robust_list_head { +@@ -40,6 +41,8 @@ + FUTEX_PRIVATE_FLAG) + #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) ++#define FUTEX_WAIT_MULTIPLE_PRIVATE (FUTEX_WAIT_MULTIPLE | \ ++ FUTEX_PRIVATE_FLAG) + + /* + * Support for robust futexes: the kernel cleans up held futexes at +@@ -150,4 +153,21 @@ struct robust_list_head { (((op & 0xf) << 28) | ((cmp & 0xf) << 24) \ | ((oparg & 0xfff) << 12) | (cmparg & 0xfff)) ++/* ++ * Maximum number of multiple futexes to wait for ++ */ ++#define FUTEX_MULTIPLE_MAX_COUNT 128 ++ ++/** ++ * struct futex_wait_block - Block of futexes to be waited for ++ * @uaddr: User address of the futex ++ * @val: Futex value expected by userspace ++ * @bitset: Bitset for the optional bitmasked wakeup ++ */ +struct futex_wait_block { + __u32 __user *uaddr; + __u32 val; @@ -196,189 +47,314 @@ index a89eb0accd5e..2401c4cf5095 100644 + #endif /* _UAPI_LINUX_FUTEX_H */ diff --git a/kernel/futex.c b/kernel/futex.c -index 91f3db335c57..2623e8f152cd 100644 +index 0cf84c8664f207c574325b899ef2e57f01295a94..58cf9eb2b851b4858e29b5ef4114a29a92e676ba 100644 --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -183,6 +183,7 @@ static int __read_mostly futex_cmpxchg_enabled; - #endif - #define FLAGS_CLOCKRT 0x02 - #define FLAGS_HAS_TIMEOUT 0x04 -+#define FLAGS_WAKE_MULTIPLE 0x08 +@@ -215,6 +215,8 @@ struct futex_pi_state { + * @rt_waiter: rt_waiter storage for use with requeue_pi + * @requeue_pi_key: the requeue_pi target futex key + * @bitset: bitset for the optional bitmasked wakeup ++ * @uaddr: userspace address of futex ++ * @uval: expected futex's value + * + * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so + * we can wake only the relevant ones (hashed queues may be shared). +@@ -237,6 +239,8 @@ struct futex_q { + struct rt_mutex_waiter *rt_waiter; + union futex_key *requeue_pi_key; + u32 bitset; ++ u32 __user *uaddr; ++ u32 uval; + } __randomize_layout; - /* - * Priority Inheritance state: -@@ -2720,6 +2721,150 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, + static const struct futex_q futex_q_init = { +@@ -2420,6 +2424,29 @@ static int unqueue_me(struct futex_q *q) return ret; } -+static int do_futex_wait_multiple(struct futex_wait_block *wb, -+ u32 count, unsigned int flags, -+ ktime_t *abs_time) ++/** ++ * unqueue_multiple() - Remove several futexes from their futex_hash_bucket ++ * @q: The list of futexes to unqueue ++ * @count: Number of futexes in the list ++ * ++ * Helper to unqueue a list of futexes. This can't fail. ++ * ++ * Return: ++ * - >=0 - Index of the last futex that was awoken; ++ * - -1 - If no futex was awoken ++ */ ++static int unqueue_multiple(struct futex_q *q, int count) +{ -+ -+ struct hrtimer_sleeper timeout, *to; -+ struct futex_hash_bucket *hb; -+ struct futex_q *qs = NULL; -+ int ret; ++ int ret = -1; + int i; + -+ qs = kcalloc(count, sizeof(struct futex_q), GFP_KERNEL); -+ if (!qs) -+ return -ENOMEM; ++ for (i = 0; i < count; i++) { ++ if (!unqueue_me(&q[i])) ++ ret = i; ++ } ++ return ret; ++} + -+ to = futex_setup_timer(abs_time, &timeout, flags, -+ current->timer_slack_ns); -+ retry: + /* + * PI futexes can not be requeued and must remove themself from the + * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry +@@ -2783,6 +2810,211 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, + return ret; + } + ++/** ++ * futex_wait_multiple_setup() - Prepare to wait and enqueue multiple futexes ++ * @qs: The corresponding futex list ++ * @count: The size of the lists ++ * @flags: Futex flags (FLAGS_SHARED, etc.) ++ * @awaken: Index of the last awoken futex ++ * ++ * Prepare multiple futexes in a single step and enqueue them. This may fail if ++ * the futex list is invalid or if any futex was already awoken. On success the ++ * task is ready to interruptible sleep. ++ * ++ * Return: ++ * - 1 - One of the futexes was awaken by another thread ++ * - 0 - Success ++ * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL ++ */ ++static int futex_wait_multiple_setup(struct futex_q *qs, int count, ++ unsigned int flags, int *awaken) ++{ ++ struct futex_hash_bucket *hb; ++ int ret, i; ++ u32 uval; ++ ++ /* ++ * Enqueuing multiple futexes is tricky, because we need to ++ * enqueue each futex in the list before dealing with the next ++ * one to avoid deadlocking on the hash bucket. But, before ++ * enqueuing, we need to make sure that current->state is ++ * TASK_INTERRUPTIBLE, so we don't absorb any awake events, which ++ * cannot be done before the get_futex_key of the next key, ++ * because it calls get_user_pages, which can sleep. Thus, we ++ * fetch the list of futexes keys in two steps, by first pinning ++ * all the memory keys in the futex key, and only then we read ++ * each key and queue the corresponding futex. ++ */ ++retry: + for (i = 0; i < count; i++) { + qs[i].key = FUTEX_KEY_INIT; -+ qs[i].bitset = wb[i].bitset; -+ -+ ret = get_futex_key(wb[i].uaddr, flags & FLAGS_SHARED, ++ ret = get_futex_key(qs[i].uaddr, flags & FLAGS_SHARED, + &qs[i].key, FUTEX_READ); -+ if (unlikely(ret != 0)) { ++ if (unlikely(ret)) { + for (--i; i >= 0; i--) + put_futex_key(&qs[i].key); -+ goto out; ++ return ret; + } + } + + set_current_state(TASK_INTERRUPTIBLE); + + for (i = 0; i < count; i++) { -+ ret = __futex_wait_setup(wb[i].uaddr, wb[i].val, -+ flags, &qs[i], &hb); -+ if (ret) { -+ /* Drop the failed key directly. keys 0..(i-1) -+ * will be put by unqueue_me. -+ */ -+ put_futex_key(&qs[i].key); ++ struct futex_q *q = &qs[i]; + -+ /* Undo the partial work we did. */ -+ for (--i; i >= 0; i--) -+ unqueue_me(&qs[i]); ++ hb = queue_lock(q); ++ ++ ret = get_futex_value_locked(&uval, q->uaddr); ++ if (ret) { ++ /* ++ * We need to try to handle the fault, which ++ * cannot be done without sleep, so we need to ++ * undo all the work already done, to make sure ++ * we don't miss any wake ups. Therefore, clean ++ * up, handle the fault and retry from the ++ * beginning. ++ */ ++ queue_unlock(hb); ++ ++ /* ++ * Keys 0..(i-1) are implicitly put ++ * on unqueue_multiple. ++ */ ++ put_futex_key(&q->key); ++ ++ *awaken = unqueue_multiple(qs, i); + + __set_current_state(TASK_RUNNING); -+ if (ret > 0) -+ goto retry; -+ goto out; ++ ++ /* ++ * On a real fault, prioritize the error even if ++ * some other futex was awoken. Userspace gave ++ * us a bad address, -EFAULT them. ++ */ ++ ret = get_user(uval, q->uaddr); ++ if (ret) ++ return ret; ++ ++ /* ++ * Even if the page fault was handled, If ++ * something was already awaken, we can safely ++ * give up and succeed to give a hint for userspace to ++ * acquire the right futex faster. ++ */ ++ if (*awaken >= 0) ++ return 1; ++ ++ goto retry; + } + -+ /* We can't hold to the bucket lock when dealing with -+ * the next futex. Queue ourselves now so we can unlock -+ * it before moving on. ++ if (uval != q->uval) { ++ queue_unlock(hb); ++ ++ put_futex_key(&qs[i].key); ++ ++ /* ++ * If something was already awaken, we can ++ * safely ignore the error and succeed. ++ */ ++ *awaken = unqueue_multiple(qs, i); ++ __set_current_state(TASK_RUNNING); ++ if (*awaken >= 0) ++ return 1; ++ ++ return -EWOULDBLOCK; ++ } ++ ++ /* ++ * The bucket lock can't be held while dealing with the ++ * next futex. Queue each futex at this moment so hb can ++ * be unlocked. + */ + queue_me(&qs[i], hb); + } ++ return 0; ++} + -+ if (to) -+ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); ++/** ++ * futex_wait_multiple() - Prepare to wait on and enqueue several futexes ++ * @qs: The list of futexes to wait on ++ * @op: Operation code from futex's syscall ++ * @count: The number of objects ++ * @abs_time: Timeout before giving up and returning to userspace ++ * ++ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function ++ * sleeps on a group of futexes and returns on the first futex that ++ * triggered, or after the timeout has elapsed. ++ * ++ * Return: ++ * - >=0 - Hint to the futex that was awoken ++ * - <0 - On error ++ */ ++static int futex_wait_multiple(struct futex_q *qs, int op, ++ u32 count, ktime_t *abs_time) ++{ ++ struct hrtimer_sleeper timeout, *to; ++ int ret, flags = 0, hint = 0; ++ unsigned int i; + -+ /* There is no easy to way to check if we are wake already on -+ * multiple futexes without waking through each one of them. So -+ * just sleep and let the scheduler handle it. -+ */ -+ if (!to || to->task) -+ freezable_schedule(); ++ if (!(op & FUTEX_PRIVATE_FLAG)) ++ flags |= FLAGS_SHARED; + -+ __set_current_state(TASK_RUNNING); ++ if (op & FUTEX_CLOCK_REALTIME) ++ flags |= FLAGS_CLOCKRT; + -+ ret = -ETIMEDOUT; -+ /* If we were woken (and unqueued), we succeeded. */ -+ for (i = 0; i < count; i++) -+ if (!unqueue_me(&qs[i])) -+ ret = i; ++ to = futex_setup_timer(abs_time, &timeout, flags, 0); ++ while (1) { ++ ret = futex_wait_multiple_setup(qs, count, flags, &hint); ++ if (ret) { ++ if (ret > 0) { ++ /* A futex was awaken during setup */ ++ ret = hint; ++ } ++ break; ++ } + -+ /* Succeed wakeup */ -+ if (ret >= 0) -+ goto out; ++ if (to) ++ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); + -+ /* Woken by triggered timeout */ -+ if (to && !to->task) -+ goto out; ++ /* ++ * Avoid sleeping if another thread already tried to ++ * wake us. ++ */ ++ for (i = 0; i < count; i++) { ++ if (plist_node_empty(&qs[i].list)) ++ break; ++ } + -+ /* -+ * We expect signal_pending(current), but we might be the -+ * victim of a spurious wakeup as well. -+ */ -+ if (!signal_pending(current)) -+ goto retry; ++ if (i == count && (!to || to->task)) ++ freezable_schedule(); + -+ ret = -ERESTARTSYS; -+ if (!abs_time) -+ goto out; ++ ret = unqueue_multiple(qs, count); ++ ++ __set_current_state(TASK_RUNNING); ++ ++ if (ret >= 0) ++ break; ++ if (to && !to->task) { ++ ret = -ETIMEDOUT; ++ break; ++ } else if (signal_pending(current)) { ++ ret = -ERESTARTSYS; ++ break; ++ } ++ /* ++ * The final case is a spurious wakeup, for ++ * which just retry. ++ */ ++ } + -+ ret = -ERESTART_RESTARTBLOCK; -+ out: + if (to) { + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + } + -+ kfree(qs); -+ return ret; -+} -+ -+static int futex_wait_multiple(u32 __user *uaddr, unsigned int flags, -+ u32 count, ktime_t *abs_time) -+{ -+ struct futex_wait_block *wb; -+ struct restart_block *restart; -+ int ret; -+ -+ if (!count) -+ return -EINVAL; -+ -+ wb = kcalloc(count, sizeof(struct futex_wait_block), GFP_KERNEL); -+ if (!wb) -+ return -ENOMEM; -+ -+ if (copy_from_user(wb, uaddr, -+ count * sizeof(struct futex_wait_block))) { -+ ret = -EFAULT; -+ goto out; -+ } -+ -+ ret = do_futex_wait_multiple(wb, count, flags, abs_time); -+ -+ if (ret == -ERESTART_RESTARTBLOCK) { -+ restart = ¤t->restart_block; -+ restart->fn = futex_wait_restart; -+ restart->futex.uaddr = uaddr; -+ restart->futex.val = count; -+ restart->futex.time = *abs_time; -+ restart->futex.flags = (flags | FLAGS_HAS_TIMEOUT | -+ FLAGS_WAKE_MULTIPLE); -+ } -+ -+out: -+ kfree(wb); + return ret; +} + static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) { -@@ -2797,6 +2942,10 @@ static long futex_wait_restart(struct restart_block *restart) - } - restart->fn = do_no_restart_syscall; - -+ if (restart->futex.flags & FLAGS_WAKE_MULTIPLE) -+ return (long)futex_wait_multiple(uaddr, restart->futex.flags, -+ restart->futex.val, tp); -+ - return (long)futex_wait(uaddr, restart->futex.flags, - restart->futex.val, tp, restart->futex.bitset); - } -@@ -3680,6 +3829,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, - uaddr2); - case FUTEX_CMP_REQUEUE_PI: - return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); -+ case FUTEX_WAIT_MULTIPLE: -+ return futex_wait_multiple(uaddr, flags, val, timeout); - } +@@ -3907,6 +4139,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, return -ENOSYS; } -@@ -3696,7 +3847,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + ++/** ++ * futex_read_wait_block - Read an array of futex_wait_block from userspace ++ * @uaddr: Userspace address of the block ++ * @count: Number of blocks to be read ++ * ++ * This function creates and allocate an array of futex_q (we zero it to ++ * initialize the fields) and then, for each futex_wait_block element from ++ * userspace, fill a futex_q element with proper values. ++ */ ++inline struct futex_q *futex_read_wait_block(u32 __user *uaddr, u32 count) ++{ ++ unsigned int i; ++ struct futex_q *qs; ++ struct futex_wait_block fwb; ++ struct futex_wait_block __user *entry = ++ (struct futex_wait_block __user *)uaddr; ++ ++ if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) ++ return ERR_PTR(-EINVAL); ++ ++ qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); ++ if (!qs) ++ return ERR_PTR(-ENOMEM); ++ ++ for (i = 0; i < count; i++) { ++ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { ++ kfree(qs); ++ return ERR_PTR(-EFAULT); ++ } ++ ++ qs[i].uaddr = fwb.uaddr; ++ qs[i].uval = fwb.val; ++ qs[i].bitset = fwb.bitset; ++ } ++ ++ return qs; ++} + + SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + struct __kernel_timespec __user *, utime, u32 __user *, uaddr2, +@@ -3919,7 +4188,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || @@ -388,16 +364,91 @@ index 91f3db335c57..2623e8f152cd 100644 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) return -EFAULT; if (get_timespec64(&ts, utime)) -@@ -3705,7 +3857,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - return -EINVAL; +@@ -3940,6 +4210,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) + val2 = (u32) (unsigned long) utime; - t = timespec64_to_ktime(ts); -- if (cmd == FUTEX_WAIT) -+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) - t = ktime_add_safe(ktime_get(), t); - tp = &t; - } -@@ -3889,14 +4041,15 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, ++ if (cmd == FUTEX_WAIT_MULTIPLE) { ++ int ret; ++ struct futex_q *qs; ++ ++#ifdef CONFIG_X86_X32 ++ if (unlikely(in_x32_syscall())) ++ return -ENOSYS; ++#endif ++ qs = futex_read_wait_block(uaddr, val); ++ ++ if (IS_ERR(qs)) ++ return PTR_ERR(qs); ++ ++ ret = futex_wait_multiple(qs, op, val, tp); ++ kfree(qs); ++ ++ return ret; ++ } ++ + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); + } + +@@ -4102,6 +4391,57 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + #endif /* CONFIG_COMPAT */ + + #ifdef CONFIG_COMPAT_32BIT_TIME ++/** ++ * struct compat_futex_wait_block - Block of futexes to be waited for ++ * @uaddr: User address of the futex (compatible pointer) ++ * @val: Futex value expected by userspace ++ * @bitset: Bitset for the optional bitmasked wakeup ++ */ ++struct compat_futex_wait_block { ++ compat_uptr_t uaddr; ++ __u32 val; ++ __u32 bitset; ++}; ++ ++/** ++ * compat_futex_read_wait_block - Read an array of futex_wait_block from ++ * userspace ++ * @uaddr: Userspace address of the block ++ * @count: Number of blocks to be read ++ * ++ * This function does the same as futex_read_wait_block(), except that it ++ * converts the pointer to the futex from the compat version to the regular one. ++ */ ++inline struct futex_q *compat_futex_read_wait_block(u32 __user *uaddr, ++ u32 count) ++{ ++ unsigned int i; ++ struct futex_q *qs; ++ struct compat_futex_wait_block fwb; ++ struct compat_futex_wait_block __user *entry = ++ (struct compat_futex_wait_block __user *)uaddr; ++ ++ if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) ++ return ERR_PTR(-EINVAL); ++ ++ qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); ++ if (!qs) ++ return ERR_PTR(-ENOMEM); ++ ++ for (i = 0; i < count; i++) { ++ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { ++ kfree(qs); ++ return ERR_PTR(-EFAULT); ++ } ++ ++ qs[i].uaddr = compat_ptr(fwb.uaddr); ++ qs[i].uval = fwb.val; ++ qs[i].bitset = fwb.bitset; ++ } ++ ++ return qs; ++} ++ + SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + struct old_timespec32 __user *, utime, u32 __user *, uaddr2, + u32, val3) +@@ -4113,7 +4453,8 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || @@ -407,6 +458,446 @@ index 91f3db335c57..2623e8f152cd 100644 if (get_old_timespec32(&ts, utime)) return -EFAULT; if (!timespec64_valid(&ts)) +@@ -4128,6 +4469,19 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) + val2 = (int) (unsigned long) utime; + ++ if (cmd == FUTEX_WAIT_MULTIPLE) { ++ int ret; ++ struct futex_q *qs = compat_futex_read_wait_block(uaddr, val); ++ ++ if (IS_ERR(qs)) ++ return PTR_ERR(qs); ++ ++ ret = futex_wait_multiple(qs, op, val, tp); ++ kfree(qs); ++ ++ return ret; ++ } ++ + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); + } + #endif /* CONFIG_COMPAT_32BIT_TIME */ +diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c +index ee55e6d389a3f053194435342c4e471dc7cf8786..2a63e1c2cfb6407a5988233217cff2e52787bc66 100644 +--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c ++++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c +@@ -11,6 +11,7 @@ + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart ++ * 2019-Dec-13: Add WAIT_MULTIPLE test by Krisman + * + *****************************************************************************/ + +@@ -41,6 +42,8 @@ int main(int argc, char *argv[]) + { + futex_t f1 = FUTEX_INITIALIZER; + struct timespec to; ++ time_t secs; ++ struct futex_wait_block fwb = {&f1, f1, 0}; + int res, ret = RET_PASS; + int c; + +@@ -65,7 +68,7 @@ int main(int argc, char *argv[]) + } + + ksft_print_header(); +- ksft_set_plan(1); ++ ksft_set_plan(2); + ksft_print_msg("%s: Block on a futex and wait for timeout\n", + basename(argv[0])); + ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); +@@ -79,8 +82,39 @@ int main(int argc, char *argv[]) + if (!res || errno != ETIMEDOUT) { + fail("futex_wait returned %d\n", ret < 0 ? errno : ret); + ret = RET_FAIL; ++ } else ++ ksft_test_result_pass("futex_wait timeout succeeds\n"); ++ ++ info("Calling futex_wait_multiple on f1: %u @ %p\n", f1, &f1); ++ ++ /* Setup absolute time */ ++ ret = clock_gettime(CLOCK_REALTIME, &to); ++ secs = (to.tv_nsec + timeout_ns) / 1000000000; ++ to.tv_nsec = ((int64_t)to.tv_nsec + timeout_ns) % 1000000000; ++ to.tv_sec += secs; ++ info("to.tv_sec = %ld\n", to.tv_sec); ++ info("to.tv_nsec = %ld\n", to.tv_nsec); ++ ++ res = futex_wait_multiple(&fwb, 1, &to, ++ FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME); ++ ++#ifdef __ILP32__ ++ if (res == -1 && errno == ENOSYS) { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } else { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; + } ++#else ++ if (!res || errno != ETIMEDOUT) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else ++ ksft_test_result_pass("futex_wait_multiple timeout succeeds\n"); ++#endif /* __ILP32__ */ + +- print_result(TEST_NAME, ret); ++ ksft_print_cnts(); + return ret; + } +diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h +index ddbcfc9b7bac4aebb5bac2f249e26ecfd948aa84..bb103bef4557012ef9a389ca74c868e4476a8a31 100644 +--- a/tools/testing/selftests/futex/include/futextest.h ++++ b/tools/testing/selftests/futex/include/futextest.h +@@ -38,6 +38,14 @@ typedef volatile u_int32_t futex_t; + #ifndef FUTEX_CMP_REQUEUE_PI + #define FUTEX_CMP_REQUEUE_PI 12 + #endif ++#ifndef FUTEX_WAIT_MULTIPLE ++#define FUTEX_WAIT_MULTIPLE 13 ++struct futex_wait_block { ++ futex_t *uaddr; ++ futex_t val; ++ __u32 bitset; ++}; ++#endif + #ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE + #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +@@ -80,6 +88,20 @@ futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags) + return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); + } + ++/** ++ * futex_wait_multiple() - block on several futexes with optional timeout ++ * @fwb: wait block user space address ++ * @count: number of entities at fwb ++ * @timeout: absolute timeout ++ */ ++static inline int ++futex_wait_multiple(struct futex_wait_block *fwb, int count, ++ struct timespec *timeout, int opflags) ++{ ++ return futex(fwb, FUTEX_WAIT_MULTIPLE, count, timeout, NULL, 0, ++ opflags); ++} ++ + /** + * futex_wake() - wake one or more tasks blocked on uaddr + * @nr_wake: wake up to this many tasks +diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +index 0ae390ff816449c88d0bb655a26eb014382c2b4f..bcbac042992d447e0bc9ef5fefe94e875de310f2 100644 +--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c ++++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +@@ -12,6 +12,7 @@ + * + * HISTORY + * 2009-Nov-14: Initial version by Gowrishankar ++ * 2019-Dec-13: Add WAIT_MULTIPLE test by Krisman + * + *****************************************************************************/ + +@@ -40,6 +41,7 @@ int main(int argc, char *argv[]) + { + struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + futex_t f1 = FUTEX_INITIALIZER; ++ struct futex_wait_block fwb = {&f1, f1+1, 0}; + int res, ret = RET_PASS; + int c; + +@@ -61,7 +63,7 @@ int main(int argc, char *argv[]) + } + + ksft_print_header(); +- ksft_set_plan(1); ++ ksft_set_plan(2); + ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", + basename(argv[0])); + +@@ -71,8 +73,30 @@ int main(int argc, char *argv[]) + fail("futex_wait returned: %d %s\n", + res ? errno : res, res ? strerror(errno) : ""); + ret = RET_FAIL; ++ } else ++ ksft_test_result_pass("futex_wait wouldblock succeeds\n"); ++ ++ info("Calling futex_wait_multiple on f1: %u @ %p with val=%u\n", ++ f1, &f1, f1+1); ++ res = futex_wait_multiple(&fwb, 1, NULL, FUTEX_PRIVATE_FLAG); ++ ++#ifdef __ILP32__ ++ if (res != -1 || errno != ENOSYS) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } ++#else ++ if (!res || errno != EWOULDBLOCK) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; + } ++ ksft_test_result_pass("futex_wait_multiple wouldblock succeeds\n"); ++#endif /* __ILP32__ */ + +- print_result(TEST_NAME, ret); ++ ksft_print_cnts(); + return ret; + } +diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore +index a09f570619023750f558c84004aff166b4337d72..4660128a545edb04a17cc6bd9760931c1386122f 100644 +--- a/tools/testing/selftests/futex/functional/.gitignore ++++ b/tools/testing/selftests/futex/functional/.gitignore +@@ -5,3 +5,4 @@ futex_wait_private_mapped_file + futex_wait_timeout + futex_wait_uninitialized_heap + futex_wait_wouldblock ++futex_wait_multiple +diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile +index 30996306cabcfe89a47977643e529b122893bb7e..75f9fface11fa3c90c1bdb9a49b3ea51291afd58 100644 +--- a/tools/testing/selftests/futex/functional/Makefile ++++ b/tools/testing/selftests/futex/functional/Makefile +@@ -14,7 +14,8 @@ TEST_GEN_FILES := \ + futex_requeue_pi_signal_restart \ + futex_requeue_pi_mismatched_ops \ + futex_wait_uninitialized_heap \ +- futex_wait_private_mapped_file ++ futex_wait_private_mapped_file \ ++ futex_wait_multiple + + TEST_PROGS := run.sh + +diff --git a/tools/testing/selftests/futex/functional/futex_wait_multiple.c b/tools/testing/selftests/futex/functional/futex_wait_multiple.c +new file mode 100644 +index 0000000000000000000000000000000000000000..b48422e79f42edba1653bb0bd2a4c4fd98d2d48d +--- /dev/null ++++ b/tools/testing/selftests/futex/functional/futex_wait_multiple.c +@@ -0,0 +1,173 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/****************************************************************************** ++ * ++ * Copyright © Collabora, Ltd., 2019 ++ * ++ * DESCRIPTION ++ * Test basic semantics of FUTEX_WAIT_MULTIPLE ++ * ++ * AUTHOR ++ * Gabriel Krisman Bertazi ++ * ++ * HISTORY ++ * 2019-Dec-13: Initial version by Krisman ++ * ++ *****************************************************************************/ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "futextest.h" ++#include "logging.h" ++ ++#define TEST_NAME "futex-wait-multiple" ++#define timeout_ns 100000 ++#define MAX_COUNT 128 ++#define WAKE_WAIT_US 3000000 ++ ++int ret = RET_PASS; ++char *progname; ++futex_t f[MAX_COUNT] = {0}; ++struct futex_wait_block fwb[MAX_COUNT]; ++ ++void usage(char *prog) ++{ ++ printf("Usage: %s\n", prog); ++ printf(" -c Use color\n"); ++ printf(" -h Display this help message\n"); ++ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", ++ VQUIET, VCRITICAL, VINFO); ++} ++ ++void test_count_overflow(void) ++{ ++ futex_t f = FUTEX_INITIALIZER; ++ struct futex_wait_block fwb[MAX_COUNT+1]; ++ int res, i; ++ ++ ksft_print_msg("%s: Test a too big number of futexes\n", progname); ++ ++ for (i = 0; i < MAX_COUNT+1; i++) { ++ fwb[i].uaddr = &f; ++ fwb[i].val = f; ++ fwb[i].bitset = 0; ++ } ++ ++ res = futex_wait_multiple(fwb, MAX_COUNT+1, NULL, FUTEX_PRIVATE_FLAG); ++ ++#ifdef __ILP32__ ++ if (res != -1 || errno != ENOSYS) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } ++#else ++ if (res != -1 || errno != EINVAL) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_pass("futex_wait_multiple count overflow succeed\n"); ++ } ++ ++#endif /* __ILP32__ */ ++} ++ ++void *waiterfn(void *arg) ++{ ++ int res; ++ ++ res = futex_wait_multiple(fwb, MAX_COUNT, NULL, FUTEX_PRIVATE_FLAG); ++ ++#ifdef __ILP32__ ++ if (res != -1 || errno != ENOSYS) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } ++#else ++ if (res < 0) ++ ksft_print_msg("waiter failed %d\n", res); ++ ++ info("futex_wait_multiple: Got hint futex %d was freed\n", res); ++#endif /* __ILP32__ */ ++ ++ return NULL; ++} ++ ++void test_fwb_wakeup(void) ++{ ++ int res, i; ++ pthread_t waiter; ++ ++ ksft_print_msg("%s: Test wake up in a list of futex\n", progname); ++ ++ for (i = 0; i < MAX_COUNT; i++) { ++ fwb[i].uaddr = &f[i]; ++ fwb[i].val = f[i]; ++ fwb[i].bitset = 0xffffffff; ++ } ++ ++ res = pthread_create(&waiter, NULL, waiterfn, NULL); ++ if (res) { ++ ksft_test_result_fail("Creating waiting thread failed"); ++ ksft_exit_fail(); ++ } ++ ++ usleep(WAKE_WAIT_US); ++ res = futex_wake(&(f[MAX_COUNT-1]), 1, FUTEX_PRIVATE_FLAG); ++ if (res != 1) { ++ ksft_test_result_fail("Failed to wake thread res=%d\n", res); ++ ksft_exit_fail(); ++ } ++ ++ pthread_join(waiter, NULL); ++ ksft_test_result_pass("%s succeed\n", __func__); ++} ++ ++int main(int argc, char *argv[]) ++{ ++ int c; ++ ++ while ((c = getopt(argc, argv, "cht:v:")) != -1) { ++ switch (c) { ++ case 'c': ++ log_color(1); ++ break; ++ case 'h': ++ usage(basename(argv[0])); ++ exit(0); ++ case 'v': ++ log_verbosity(atoi(optarg)); ++ break; ++ default: ++ usage(basename(argv[0])); ++ exit(1); ++ } ++ } ++ ++ progname = basename(argv[0]); ++ ++ ksft_print_header(); ++ ksft_set_plan(2); ++ ++ test_count_overflow(); ++ ++#ifdef __ILP32__ ++ // if it's a 32x binary, there's no futex to wakeup ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++#else ++ test_fwb_wakeup(); ++#endif /* __ILP32__ */ ++ ++ ksft_print_cnts(); ++ return ret; ++} +diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh +index 1acb6ace1680e8f3d6b3ee2dc528c19ddfdb018e..a8be94f28ff78b4879d2d19bca5d9b0fcb26c1f8 100755 +--- a/tools/testing/selftests/futex/functional/run.sh ++++ b/tools/testing/selftests/futex/functional/run.sh +@@ -73,3 +73,6 @@ echo + echo + ./futex_wait_uninitialized_heap $COLOR + ./futex_wait_private_mapped_file $COLOR ++ ++echo ++./futex_wait_multiple $COLOR +diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h +index 580001e89c6caed57dd8b3cb491d65dce846caff..a3e760886b8e7e74285fdcf2caaaa6f66ad16675 100644 +--- a/include/uapi/linux/futex.h ++++ b/include/uapi/linux/futex.h +@@ -21,7 +21,7 @@ + #define FUTEX_WAKE_BITSET 10 + #define FUTEX_WAIT_REQUEUE_PI 11 + #define FUTEX_CMP_REQUEUE_PI 12 +-#define FUTEX_WAIT_MULTIPLE 13 ++#define FUTEX_WAIT_MULTIPLE 31 + + #define FUTEX_PRIVATE_FLAG 128 + #define FUTEX_CLOCK_REALTIME 256 +diff --git a/kernel/futex.c b/kernel/futex.c +index 58cf9eb2b851b4858e29b5ef4114a29a92e676ba..e0bb628a5e1988dcc9ae5442a4259edc229d578d 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -4198,7 +4198,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + return -EINVAL; + + t = timespec64_to_ktime(ts); +- if (cmd == FUTEX_WAIT) ++ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) + t = ktime_add_safe(ktime_get(), t); + tp = &t; + } +@@ -4399,6 +4399,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + */ + struct compat_futex_wait_block { + compat_uptr_t uaddr; ++ __u32 pad; + __u32 val; + __u32 bitset; + }; +@@ -4461,7 +4462,7 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, return -EINVAL; t = timespec64_to_ktime(ts); @@ -415,5 +906,3 @@ index 91f3db335c57..2623e8f152cd 100644 t = ktime_add_safe(ktime_get(), t); tp = &t; } --- -2.20.1