Merge tag 'mm-nonmm-stable-2024-09-21-07-52' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull non-MM updates from Andrew Morton: "Many singleton patches - please see the various changelogs for details. Quite a lot of nilfs2 work this time around. Notable patch series in this pull request are: - "mul_u64_u64_div_u64: new implementation" by Nicolas Pitre, with assistance from Uwe Kleine-König. Reimplement mul_u64_u64_div_u64() to provide (much) more accurate results. The current implementation was causing Uwe some issues in the PWM drivers. - "xz: Updates to license, filters, and compression options" from Lasse Collin. Miscellaneous maintenance and kinor feature work to the xz decompressor. - "Fix some GDB command error and add some GDB commands" from Kuan-Ying Lee. Fixes and enhancements to the gdb scripts. - "treewide: add missing MODULE_DESCRIPTION() macros" from Jeff Johnson. Adds lots of MODULE_DESCRIPTIONs, thus fixing lots of warnings about this. - "nilfs2: add support for some common ioctls" from Ryusuke Konishi. Adds various commonly-available ioctls to nilfs2. - "This series fixes a number of formatting issues in kernel doc comments" from Ryusuke Konishi does that. - "nilfs2: prevent unexpected ENOENT propagation" from Ryusuke Konishi. Fix issues where -ENOENT was being unintentionally and inappropriately returned to userspace. - "nilfs2: assorted cleanups" from Huang Xiaojia. - "nilfs2: fix potential issues with empty b-tree nodes" from Ryusuke Konishi fixes some issues which can occur on corrupted nilfs2 filesystems. - "scripts/decode_stacktrace.sh: improve error reporting and usability" from Luca Ceresoli does those things" * tag 'mm-nonmm-stable-2024-09-21-07-52' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (103 commits) list: test: increase coverage of list_test_list_replace*() list: test: fix tests for list_cut_position() proc: use __auto_type more treewide: correct the typo 'retun' ocfs2: cleanup return value and mlog in ocfs2_global_read_info() nilfs2: remove duplicate 'unlikely()' usage nilfs2: fix potential oob read in nilfs_btree_check_delete() nilfs2: determine empty node blocks as corrupted nilfs2: fix potential null-ptr-deref in nilfs_btree_insert() user_namespace: use kmemdup_array() instead of kmemdup() for multiple allocation tools/mm: rm thp_swap_allocator_test when make clean squashfs: fix percpu address space issues in decompressor_multi_percpu.c lib: glob.c: added null check for character class nilfs2: refactor nilfs_segctor_thread() nilfs2: use kthread_create and kthread_stop for the log writer thread nilfs2: remove sc_timer_task nilfs2: do not repair reserved inode bitmap in nilfs_new_inode() nilfs2: eliminate the shared counter and spinlock for i_generation nilfs2: separate inode type information from i_state field nilfs2: use the BITS_PER_LONG macro ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2024-09-21 08:20:50 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2024-09-21 08:20:50 -0700
commit: 7856a565416e0cf091f825b0e25c7a1b7abb650e (patch)
tree: 0a04a0594167fc997b3b1299610b5ef95ab89f19 /lib/math
parent: 617a814f14b8914271f7a70366d72c6196d17663 (diff)
parent: 5e06e08939df1cafef97a8e04f4b88c2806b538a (diff)
download: lwn-7856a565416e0cf091f825b0e25c7a1b7abb650e.tar.gz
lwn-7856a565416e0cf091f825b0e25c7a1b7abb650e.zip
3 files changed, 172 insertions, 43 deletions
diff --git a/lib/math/Makefile b/lib/math/Makefile
index 3c1f92a7459d..3ef11305f8d2 100644
--- a/lib/math/Makefile
+++ b/lib/math/Makefile
@@ -7,4 +7,5 @@ obj-$(CONFIG_RATIONAL)		+= rational.o
 
 obj-$(CONFIG_INT_POW_TEST)  += tests/int_pow_kunit.o
 obj-$(CONFIG_TEST_DIV64)	+= test_div64.o
+obj-$(CONFIG_TEST_MULDIV64)	+= test_mul_u64_u64_div_u64.o
 obj-$(CONFIG_RATIONAL_KUNIT_TEST) += rational-test.o
diff --git a/lib/math/div64.c b/lib/math/div64.c
index 191761b1b623..5faa29208bdb 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -186,55 +186,84 @@ EXPORT_SYMBOL(iter_div_u64_rem);
 #ifndef mul_u64_u64_div_u64
 u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
 {
-	u64 res = 0, div, rem;
-	int shift;
+	if (ilog2(a) + ilog2(b) <= 62)
+		return div64_u64(a * b, c);
 
-	/* can a * b overflow ? */
-	if (ilog2(a) + ilog2(b) > 62) {
-		/*
-		 * Note that the algorithm after the if block below might lose
-		 * some precision and the result is more exact for b > a. So
-		 * exchange a and b if a is bigger than b.
-		 *
-		 * For example with a = 43980465100800, b = 100000000, c = 1000000000
-		 * the below calculation doesn't modify b at all because div == 0
-		 * and then shift becomes 45 + 26 - 62 = 9 and so the result
-		 * becomes 4398035251080. However with a and b swapped the exact
-		 * result is calculated (i.e. 4398046510080).
-		 */
-		if (a > b)
-			swap(a, b);
+#if defined(__SIZEOF_INT128__)
+
+	/* native 64x64=128 bits multiplication */
+	u128 prod = (u128)a * b;
+	u64 n_lo = prod, n_hi = prod >> 64;
+
+#else
+
+	/* perform a 64x64=128 bits multiplication manually */
+	u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32;
+	u64 x, y, z;
+
+	x = (u64)a_lo * b_lo;
+	y = (u64)a_lo * b_hi + (u32)(x >> 32);
+	z = (u64)a_hi * b_hi + (u32)(y >> 32);
+	y = (u64)a_hi * b_lo + (u32)y;
+	z += (u32)(y >> 32);
+	x = (y << 32) + (u32)x;
+
+	u64 n_lo = x, n_hi = z;
+
+#endif
+
+	/* make sure c is not zero, trigger exception otherwise */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdiv-by-zero"
+	if (unlikely(c == 0))
+		return 1/0;
+#pragma GCC diagnostic pop
+
+	int shift = __builtin_ctzll(c);
 
+	/* try reducing the fraction in case the dividend becomes <= 64 bits */
+	if ((n_hi >> shift) == 0) {
+		u64 n = shift ? (n_lo >> shift) | (n_hi << (64 - shift)) : n_lo;
+
+		return div64_u64(n, c >> shift);
 		/*
-		 * (b * a) / c is equal to
-		 *
-		 *      (b / c) * a +
-		 *      (b % c) * a / c
-		 *
-		 * if nothing overflows. Can the 1st multiplication
-		 * overflow? Yes, but we do not care: this can only
-		 * happen if the end result can't fit in u64 anyway.
-		 *
-		 * So the code below does
-		 *
-		 *      res = (b / c) * a;
-		 *      b = b % c;
+		 * The remainder value if needed would be:
+		 *   res = div64_u64_rem(n, c >> shift, &rem);
+		 *   rem = (rem << shift) + (n_lo - (n << shift));
 		 */
-		div = div64_u64_rem(b, c, &rem);
-		res = div * a;
-		b = rem;
-
-		shift = ilog2(a) + ilog2(b) - 62;
-		if (shift > 0) {
-			/* drop precision */
-			b >>= shift;
-			c >>= shift;
-			if (!c)
-				return res;
-		}
 	}
 
-	return res + div64_u64(a * b, c);
+	if (n_hi >= c) {
+		/* overflow: result is unrepresentable in a u64 */
+		return -1;
+	}
+
+	/* Do the full 128 by 64 bits division */
+
+	shift = __builtin_clzll(c);
+	c <<= shift;
+
+	int p = 64 + shift;
+	u64 res = 0;
+	bool carry;
+
+	do {
+		carry = n_hi >> 63;
+		shift = carry ? 1 : __builtin_clzll(n_hi);
+		if (p < shift)
+			break;
+		p -= shift;
+		n_hi <<= shift;
+		n_hi |= n_lo >> (64 - shift);
+		n_lo <<= shift;
+		if (carry || (n_hi >= c)) {
+			n_hi -= c;
+			res |= 1ULL << p;
+		}
+	} while (n_hi);
+	/* The remainder value if needed would be n_hi << p */
+
+	return res;
 }
 EXPORT_SYMBOL(mul_u64_u64_div_u64);
 #endif
diff --git a/lib/math/test_mul_u64_u64_div_u64.c b/lib/math/test_mul_u64_u64_div_u64.c
new file mode 100644
index 000000000000..58d058de4e73
--- /dev/null
+++ b/lib/math/test_mul_u64_u64_div_u64.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 BayLibre SAS
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/math64.h>
+
+typedef struct { u64 a; u64 b; u64 c; u64 result; } test_params;
+
+static test_params test_values[] = {
+/* this contains many edge values followed by a couple random values */
+{                0xb,                0x7,                0x3,               0x19 },
+{         0xffff0000,         0xffff0000,                0xf, 0x1110eeef00000000 },
+{         0xffffffff,         0xffffffff,                0x1, 0xfffffffe00000001 },
+{         0xffffffff,         0xffffffff,                0x2, 0x7fffffff00000000 },
+{        0x1ffffffff,         0xffffffff,                0x2, 0xfffffffe80000000 },
+{        0x1ffffffff,         0xffffffff,                0x3, 0xaaaaaaa9aaaaaaab },
+{        0x1ffffffff,        0x1ffffffff,                0x4, 0xffffffff00000000 },
+{ 0xffff000000000000, 0xffff000000000000, 0xffff000000000001, 0xfffeffffffffffff },
+{ 0x3333333333333333, 0x3333333333333333, 0x5555555555555555, 0x1eb851eb851eb851 },
+{ 0x7fffffffffffffff,                0x2,                0x3, 0x5555555555555554 },
+{ 0xffffffffffffffff,                0x2, 0x8000000000000000,                0x3 },
+{ 0xffffffffffffffff,                0x2, 0xc000000000000000,                0x2 },
+{ 0xffffffffffffffff, 0x4000000000000004, 0x8000000000000000, 0x8000000000000007 },
+{ 0xffffffffffffffff, 0x4000000000000001, 0x8000000000000000, 0x8000000000000001 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000001 },
+{ 0xfffffffffffffffe, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000000 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffe, 0x8000000000000001 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffd, 0x8000000000000002 },
+{ 0x7fffffffffffffff, 0xffffffffffffffff, 0xc000000000000000, 0xaaaaaaaaaaaaaaa8 },
+{ 0xffffffffffffffff, 0x7fffffffffffffff, 0xa000000000000000, 0xccccccccccccccca },
+{ 0xffffffffffffffff, 0x7fffffffffffffff, 0x9000000000000000, 0xe38e38e38e38e38b },
+{ 0x7fffffffffffffff, 0x7fffffffffffffff, 0x5000000000000000, 0xccccccccccccccc9 },
+{ 0xffffffffffffffff, 0xfffffffffffffffe, 0xffffffffffffffff, 0xfffffffffffffffe },
+{ 0xe6102d256d7ea3ae, 0x70a77d0be4c31201, 0xd63ec35ab3220357, 0x78f8bf8cc86c6e18 },
+{ 0xf53bae05cb86c6e1, 0x3847b32d2f8d32e0, 0xcfd4f55a647f403c, 0x42687f79d8998d35 },
+{ 0x9951c5498f941092, 0x1f8c8bfdf287a251, 0xa3c8dc5f81ea3fe2, 0x1d887cb25900091f },
+{ 0x374fee9daa1bb2bb, 0x0d0bfbff7b8ae3ef, 0xc169337bd42d5179, 0x03bb2dbaffcbb961 },
+{ 0xeac0d03ac10eeaf0, 0x89be05dfa162ed9b, 0x92bb1679a41f0e4b, 0xdc5f5cc9e270d216 },
+};
+
+/*
+ * The above table can be verified with the following shell script:
+ *
+ * #!/bin/sh
+ * sed -ne 's/^{ \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\) },$/\1 \2 \3 \4/p' \
+ *     lib/math/test_mul_u64_u64_div_u64.c |
+ * while read a b c r; do
+ *   expected=$( printf "obase=16; ibase=16; %X * %X / %X\n" $a $b $c | bc )
+ *   given=$( printf "%X\n" $r )
+ *   if [ "$expected" = "$given" ]; then
+ *     echo "$a * $b / $c = $r OK"
+ *   else
+ *     echo "$a * $b / $c = $r is wrong" >&2
+ *     echo "should be equivalent to 0x$expected" >&2
+ *     exit 1
+ *   fi
+ * done
+ */
+
+static int __init test_init(void)
+{
+	int i;
+
+	pr_info("Starting mul_u64_u64_div_u64() test\n");
+
+	for (i = 0; i < ARRAY_SIZE(test_values); i++) {
+		u64 a = test_values[i].a;
+		u64 b = test_values[i].b;
+		u64 c = test_values[i].c;
+		u64 expected_result = test_values[i].result;
+		u64 result = mul_u64_u64_div_u64(a, b, c);
+
+		if (result != expected_result) {
+			pr_err("ERROR: 0x%016llx * 0x%016llx / 0x%016llx\n", a, b, c);
+			pr_err("ERROR: expected result: %016llx\n", expected_result);
+			pr_err("ERROR: obtained result: %016llx\n", result);
+		}
+	}
+
+	pr_info("Completed mul_u64_u64_div_u64() test\n");
+	return 0;
+}
+
+static void __exit test_exit(void)
+{
+}
+
+module_init(test_init);
+module_exit(test_exit);
+
+MODULE_AUTHOR("Nicolas Pitre");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("mul_u64_u64_div_u64() test module");
author	Linus Torvalds <torvalds@linux-foundation.org>	2024-09-21 08:20:50 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2024-09-21 08:20:50 -0700
commit	7856a565416e0cf091f825b0e25c7a1b7abb650e (patch)
tree	0a04a0594167fc997b3b1299610b5ef95ab89f19 /lib/math
parent	617a814f14b8914271f7a70366d72c6196d17663 (diff)
parent	5e06e08939df1cafef97a8e04f4b88c2806b538a (diff)
download	lwn-7856a565416e0cf091f825b0e25c7a1b7abb650e.tar.gz lwn-7856a565416e0cf091f825b0e25c7a1b7abb650e.zip