fs/cifs/cifs_unicode.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * cifs_unicode:  Unicode kernel case support
 *
 * Function:
 *     Convert a unicode character to upper or lower case using
 *     compressed tables.
 *
 *   Copyright (c) International Business Machines  Corp., 2000,2009
 *
 * Notes:
 *     These APIs are based on the C library functions.  The semantics
 *     should match the C functions but with expanded size operands.
 *
 *     The upper/lower functions are based on a table created by mkupr.
 *     This is a compressed table of upper and lower case conversion.
 */
#ifndef _CIFS_UNICODE_H
#define _CIFS_UNICODE_H

#include <asm/byteorder.h>
#include <linux/types.h>
#include <linux/nls.h>

#define  UNIUPR_NOLOWER		/* Example to not expand lower case tables */

/*
 * Windows maps these to the user defined 16 bit Unicode range since they are
 * reserved symbols (along with \ and /), otherwise illegal to store
 * in filenames in NTFS
 */
#define UNI_ASTERISK    (__u16) ('*' + 0xF000)
#define UNI_QUESTION    (__u16) ('?' + 0xF000)
#define UNI_COLON       (__u16) (':' + 0xF000)
#define UNI_GRTRTHAN    (__u16) ('>' + 0xF000)
#define UNI_LESSTHAN    (__u16) ('<' + 0xF000)
#define UNI_PIPE        (__u16) ('|' + 0xF000)
#define UNI_SLASH       (__u16) ('\\' + 0xF000)

/*
 * Macs use an older "SFM" mapping of the symbols above. Fortunately it does
 * not conflict (although almost does) with the mapping above.
 */

#define SFM_DOUBLEQUOTE ((__u16) 0xF020)
#define SFM_ASTERISK    ((__u16) 0xF021)
#define SFM_QUESTION    ((__u16) 0xF025)
#define SFM_COLON       ((__u16) 0xF022)
#define SFM_GRTRTHAN    ((__u16) 0xF024)
#define SFM_LESSTHAN    ((__u16) 0xF023)
#define SFM_PIPE        ((__u16) 0xF027)
#define SFM_SLASH       ((__u16) 0xF026)
#define SFM_SPACE	((__u16) 0xF028)
#define SFM_PERIOD	((__u16) 0xF029)

/*
 * Mapping mechanism to use when one of the seven reserved characters is
 * encountered.  We can only map using one of the mechanisms at a time
 * since otherwise readdir could return directory entries which we would
 * not be able to open
 *
 * NO_MAP_UNI_RSVD  = do not perform any remapping of the character
 * SFM_MAP_UNI_RSVD = map reserved characters using SFM scheme (MAC compatible)
 * SFU_MAP_UNI_RSVD = map reserved characters ala SFU ("mapchars" option)
 *
 */
#define NO_MAP_UNI_RSVD		0
#define SFM_MAP_UNI_RSVD	1
#define SFU_MAP_UNI_RSVD	2

/* Just define what we want from uniupr.h.  We don't want to define the tables
 * in each source file.
 */
#ifndef	UNICASERANGE_DEFINED
struct UniCaseRange {
	wchar_t start;
	wchar_t end;
	signed char *table;
};
#endif				/* UNICASERANGE_DEFINED */

#ifndef UNIUPR_NOUPPER
extern signed char CifsUniUpperTable[512];
extern const struct UniCaseRange CifsUniUpperRange[];
#endif				/* UNIUPR_NOUPPER */

#ifndef UNIUPR_NOLOWER
extern signed char CifsUniLowerTable[512];
extern const struct UniCaseRange CifsUniLowerRange[];
#endif				/* UNIUPR_NOLOWER */

#ifdef __KERNEL__
int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
		    const struct nls_table *cp, int map_type);
int cifs_utf16_bytes(const __le16 *from, int maxbytes,
		     const struct nls_table *codepage);
int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *);
char *cifs_strndup_from_utf16(const char *src, const int maxlen,
			      const bool is_unicode,
			      const struct nls_table *codepage);
extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen,
			      const struct nls_table *cp, int mapChars);
extern int cifs_remap(struct cifs_sb_info *cifs_sb);
extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
				     int *utf16_len, const struct nls_table *cp,
				     int remap);
#endif

wchar_t cifs_toupper(wchar_t in);

/*
 * UniStrcat:  Concatenate the second string to the first
 *
 * Returns:
 *     Address of the first string
 */
static inline __le16 *
UniStrcat(__le16 *ucs1, const __le16 *ucs2)
{
	__le16 *anchor = ucs1;	/* save a pointer to start of ucs1 */

	while (*ucs1++) ;	/* To end of first string */
	ucs1--;			/* Return to the null */
	while ((*ucs1++ = *ucs2++)) ;	/* copy string 2 over */
	return anchor;
}

/*
 * UniStrchr:  Find a character in a string
 *
 * Returns:
 *     Address of first occurrence of character in string
 *     or NULL if the character is not in the string
 */
static inline wchar_t *
UniStrchr(const wchar_t *ucs, wchar_t uc)
{
	while ((*ucs != uc) && *ucs)
		ucs++;

	if (*ucs == uc)
		return (wchar_t *) ucs;
	return NULL;
}

/*
 * UniStrcmp:  Compare two strings
 *
 * Returns:
 *     < 0:  First string is less than second
 *     = 0:  Strings are equal
 *     > 0:  First string is greater than second
 */
static inline int
UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
{
	while ((*ucs1 == *ucs2) && *ucs1) {
		ucs1++;
		ucs2++;
	}
	return (int) *ucs1 - (int) *ucs2;
}

/*
 * UniStrcpy:  Copy a string
 */
static inline wchar_t *
UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
{
	wchar_t *anchor = ucs1;	/* save the start of result string */

	while ((*ucs1++ = *ucs2++)) ;
	return anchor;
}

/*
 * UniStrlen:  Return the length of a string (in 16 bit Unicode chars not bytes)
 */
static inline size_t
UniStrlen(const wchar_t *ucs1)
{
	int i = 0;

	while (*ucs1++)
		i++;
	return i;
}

/*
 * UniStrnlen:  Return the length (in 16 bit Unicode chars not bytes) of a
 *		string (length limited)
 */
static inline size_t
UniStrnlen(const wchar_t *ucs1, int maxlen)
{
	int i = 0;

	while (*ucs1++) {
		i++;
		if (i >= maxlen)
			break;
	}
	return i;
}

/*
 * UniStrncat:  Concatenate length limited string
 */
static inline wchar_t *
UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
{
	wchar_t *anchor = ucs1;	/* save pointer to string 1 */

	while (*ucs1++) ;
	ucs1--;			/* point to null terminator of s1 */
	while (n-- && (*ucs1 = *ucs2)) {	/* copy s2 after s1 */
		ucs1++;
		ucs2++;
	}
	*ucs1 = 0;		/* Null terminate the result */
	return (anchor);
}

/*
 * UniStrncmp:  Compare length limited string
 */
static inline int
UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
{
	if (!n)
		return 0;	/* Null strings are equal */
	while ((*ucs1 == *ucs2) && *ucs1 && --n) {
		ucs1++;
		ucs2++;
	}
	return (int) *ucs1 - (int) *ucs2;
}

/*
 * UniStrncmp_le:  Compare length limited string - native to little-endian
 */
static inline int
UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
{
	if (!n)
		return 0;	/* Null strings are equal */
	while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
		ucs1++;
		ucs2++;
	}
	return (int) *ucs1 - (int) __le16_to_cpu(*ucs2);
}

/*
 * UniStrncpy:  Copy length limited string with pad
 */
static inline wchar_t *
UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
{
	wchar_t *anchor = ucs1;

	while (n-- && *ucs2)	/* Copy the strings */
		*ucs1++ = *ucs2++;

	n++;
	while (n--)		/* Pad with nulls */
		*ucs1++ = 0;
	return anchor;
}

/*
 * UniStrncpy_le:  Copy length limited string with pad to little-endian
 */
static inline wchar_t *
UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
{
	wchar_t *anchor = ucs1;

	while (n-- && *ucs2)	/* Copy the strings */
		*ucs1++ = __le16_to_cpu(*ucs2++);

	n++;
	while (n--)		/* Pad with nulls */
		*ucs1++ = 0;
	return anchor;
}

/*
 * UniStrstr:  Find a string in a string
 *
 * Returns:
 *     Address of first match found
 *     NULL if no matching string is found
 */
static inline wchar_t *
UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
{
	const wchar_t *anchor1 = ucs1;
	const wchar_t *anchor2 = ucs2;

	while (*ucs1) {
		if (*ucs1 == *ucs2) {
			/* Partial match found */
			ucs1++;
			ucs2++;
		} else {
			if (!*ucs2)	/* Match found */
				return (wchar_t *) anchor1;
			ucs1 = ++anchor1;	/* No match */
			ucs2 = anchor2;
		}
	}

	if (!*ucs2)		/* Both end together */
		return (wchar_t *) anchor1;	/* Match found */
	return NULL;		/* No match */
}

#ifndef UNIUPR_NOUPPER
/*
 * UniToupper:  Convert a unicode character to upper case
 */
static inline wchar_t
UniToupper(register wchar_t uc)
{
	register const struct UniCaseRange *rp;

	if (uc < sizeof(CifsUniUpperTable)) {
		/* Latin characters */
		return uc + CifsUniUpperTable[uc];	/* Use base tables */
	} else {
		rp = CifsUniUpperRange;	/* Use range tables */
		while (rp->start) {
			if (uc < rp->start)	/* Before start of range */
				return uc;	/* Uppercase = input */
			if (uc <= rp->end)	/* In range */
				return uc + rp->table[uc - rp->start];
			rp++;	/* Try next range */
		}
	}
	return uc;		/* Past last range */
}

/*
 * UniStrupr:  Upper case a unicode string
 */
static inline __le16 *
UniStrupr(register __le16 *upin)
{
	register __le16 *up;

	up = upin;
	while (*up) {		/* For all characters */
		*up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
		up++;
	}
	return upin;		/* Return input pointer */
}
#endif				/* UNIUPR_NOUPPER */

#ifndef UNIUPR_NOLOWER
/*
 * UniTolower:  Convert a unicode character to lower case
 */
static inline wchar_t
UniTolower(register wchar_t uc)
{
	register const struct UniCaseRange *rp;

	if (uc < sizeof(CifsUniLowerTable)) {
		/* Latin characters */
		return uc + CifsUniLowerTable[uc];	/* Use base tables */
	} else {
		rp = CifsUniLowerRange;	/* Use range tables */
		while (rp->start) {
			if (uc < rp->start)	/* Before start of range */
				return uc;	/* Uppercase = input */
			if (uc <= rp->end)	/* In range */
				return uc + rp->table[uc - rp->start];
			rp++;	/* Try next range */
		}
	}
	return uc;		/* Past last range */
}

/*
 * UniStrlwr:  Lower case a unicode string
 */
static inline wchar_t *
UniStrlwr(register wchar_t *upin)
{
	register wchar_t *up;

	up = upin;
	while (*up) {		/* For all characters */
		*up = UniTolower(*up);
		up++;
	}
	return upin;		/* Return input pointer */
}

#endif

#endif /* _CIFS_UNICODE_H */