summaryrefslogtreecommitdiff
path: root/fs/unicode
diff options
context:
space:
mode:
authorGabriel Krisman Bertazi <krisman@collabora.com>2019-06-19 23:45:09 -0400
committerTheodore Ts'o <tytso@mit.edu>2019-06-19 23:45:09 -0400
commit3ae72562ad917df36a1b1247d749240e3b4865db (patch)
treec458ce5abc17452b19e2ecc06ce7d46841d5ec63 /fs/unicode
parentb03755ad6f33b7b8cd7312a3596a2dbf496de6e7 (diff)
downloadlwn-3ae72562ad917df36a1b1247d749240e3b4865db.tar.gz
lwn-3ae72562ad917df36a1b1247d749240e3b4865db.zip
ext4: optimize case-insensitive lookups
Temporarily cache a casefolded version of the file name under lookup in ext4_filename, to avoid repeatedly casefolding it. I got up to 30% speedup on lookups of large directories (>100k entries), depending on the length of the string under lookup. Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/unicode')
-rw-r--r--fs/unicode/utf8-core.c28
1 files changed, 28 insertions, 0 deletions
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 6afab4fdce90..71ca4d047d65 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -73,6 +73,34 @@ int utf8_strncasecmp(const struct unicode_map *um,
}
EXPORT_SYMBOL(utf8_strncasecmp);
+/* String cf is expected to be a valid UTF-8 casefolded
+ * string.
+ */
+int utf8_strncasecmp_folded(const struct unicode_map *um,
+ const struct qstr *cf,
+ const struct qstr *s1)
+{
+ const struct utf8data *data = utf8nfdicf(um->version);
+ struct utf8cursor cur1;
+ int c1, c2;
+ int i = 0;
+
+ if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+ return -EINVAL;
+
+ do {
+ c1 = utf8byte(&cur1);
+ c2 = cf->name[i++];
+ if (c1 < 0)
+ return -EINVAL;
+ if (c1 != c2)
+ return 1;
+ } while (c1);
+
+ return 0;
+}
+EXPORT_SYMBOL(utf8_strncasecmp_folded);
+
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{