kernel/fs/hfsplus/unicode.c

521 lines
10 KiB
C
Raw Normal View History

2024-07-22 17:22:30 +08:00
// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/hfsplus/unicode.c
*
* Copyright (C) 2001
* Brad Boyer (flar@allandria.com)
* (C) 2003 Ardis Technologies <roman@ardistech.com>
*
* Handler routines for unicode strings
*/
#include <linux/types.h>
#include <linux/nls.h>
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
/* Fold the case of a unicode char, given the 16 bit value */
/* Returns folded char, or 0 if ignorable */
static inline u16 case_fold(u16 c)
{
u16 tmp;
tmp = hfsplus_case_fold_table[c >> 8];
if (tmp)
tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
else
tmp = c;
return tmp;
}
/* Compare unicode strings, return values like normal strcmp */
int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
const struct hfsplus_unistr *s2)
{
u16 len1, len2, c1, c2;
const hfsplus_unichr *p1, *p2;
len1 = be16_to_cpu(s1->length);
len2 = be16_to_cpu(s2->length);
p1 = s1->unicode;
p2 = s2->unicode;
while (1) {
c1 = c2 = 0;
while (len1 && !c1) {
c1 = case_fold(be16_to_cpu(*p1));
p1++;
len1--;
}
while (len2 && !c2) {
c2 = case_fold(be16_to_cpu(*p2));
p2++;
len2--;
}
if (c1 != c2)
return (c1 < c2) ? -1 : 1;
if (!c1 && !c2)
return 0;
}
}
/* Compare names as a sequence of 16-bit unsigned integers */
int hfsplus_strcmp(const struct hfsplus_unistr *s1,
const struct hfsplus_unistr *s2)
{
u16 len1, len2, c1, c2;
const hfsplus_unichr *p1, *p2;
int len;
len1 = be16_to_cpu(s1->length);
len2 = be16_to_cpu(s2->length);
p1 = s1->unicode;
p2 = s2->unicode;
for (len = min(len1, len2); len > 0; len--) {
c1 = be16_to_cpu(*p1);
c2 = be16_to_cpu(*p2);
if (c1 != c2)
return c1 < c2 ? -1 : 1;
p1++;
p2++;
}
return len1 < len2 ? -1 :
len1 > len2 ? 1 : 0;
}
#define Hangul_SBase 0xac00
#define Hangul_LBase 0x1100
#define Hangul_VBase 0x1161
#define Hangul_TBase 0x11a7
#define Hangul_SCount 11172
#define Hangul_LCount 19
#define Hangul_VCount 21
#define Hangul_TCount 28
#define Hangul_NCount (Hangul_VCount * Hangul_TCount)
static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
{
int i, s, e;
s = 1;
e = p[1];
if (!e || cc < p[s * 2] || cc > p[e * 2])
return NULL;
do {
i = (s + e) / 2;
if (cc > p[i * 2])
s = i + 1;
else if (cc < p[i * 2])
e = i - 1;
else
return hfsplus_compose_table + p[i * 2 + 1];
} while (s <= e);
return NULL;
}
int hfsplus_uni2asc(struct super_block *sb,
const struct hfsplus_unistr *ustr,
char *astr, int *len_p)
{
const hfsplus_unichr *ip;
struct nls_table *nls = HFSPLUS_SB(sb)->nls;
u8 *op;
u16 cc, c0, c1;
u16 *ce1, *ce2;
int i, len, ustrlen, res, compose;
op = astr;
ip = ustr->unicode;
ustrlen = be16_to_cpu(ustr->length);
len = *len_p;
ce1 = NULL;
compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
while (ustrlen > 0) {
c0 = be16_to_cpu(*ip++);
ustrlen--;
/* search for single decomposed char */
if (likely(compose))
ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
if (ce1)
cc = ce1[0];
else
cc = 0;
if (cc) {
/* start of a possibly decomposed Hangul char */
if (cc != 0xffff)
goto done;
if (!ustrlen)
goto same;
c1 = be16_to_cpu(*ip) - Hangul_VBase;
if (c1 < Hangul_VCount) {
/* compose the Hangul char */
cc = (c0 - Hangul_LBase) * Hangul_VCount;
cc = (cc + c1) * Hangul_TCount;
cc += Hangul_SBase;
ip++;
ustrlen--;
if (!ustrlen)
goto done;
c1 = be16_to_cpu(*ip) - Hangul_TBase;
if (c1 > 0 && c1 < Hangul_TCount) {
cc += c1;
ip++;
ustrlen--;
}
goto done;
}
}
while (1) {
/* main loop for common case of not composed chars */
if (!ustrlen)
goto same;
c1 = be16_to_cpu(*ip);
if (likely(compose))
ce1 = hfsplus_compose_lookup(
hfsplus_compose_table, c1);
if (ce1)
break;
switch (c0) {
case 0:
c0 = 0x2400;
break;
case '/':
c0 = ':';
break;
}
res = nls->uni2char(c0, op, len);
if (res < 0) {
if (res == -ENAMETOOLONG)
goto out;
*op = '?';
res = 1;
}
op += res;
len -= res;
c0 = c1;
ip++;
ustrlen--;
}
ce2 = hfsplus_compose_lookup(ce1, c0);
if (ce2) {
i = 1;
while (i < ustrlen) {
ce1 = hfsplus_compose_lookup(ce2,
be16_to_cpu(ip[i]));
if (!ce1)
break;
i++;
ce2 = ce1;
}
cc = ce2[0];
if (cc) {
ip += i;
ustrlen -= i;
goto done;
}
}
same:
switch (c0) {
case 0:
cc = 0x2400;
break;
case '/':
cc = ':';
break;
default:
cc = c0;
}
done:
res = nls->uni2char(cc, op, len);
if (res < 0) {
if (res == -ENAMETOOLONG)
goto out;
*op = '?';
res = 1;
}
op += res;
len -= res;
}
res = 0;
out:
*len_p = (char *)op - astr;
return res;
}
/*
* Convert one or more ASCII characters into a single unicode character.
* Returns the number of ASCII characters corresponding to the unicode char.
*/
static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
wchar_t *uc)
{
int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
if (size <= 0) {
*uc = '?';
size = 1;
}
switch (*uc) {
case 0x2400:
*uc = 0;
break;
case ':':
*uc = '/';
break;
}
return size;
}
/* Decomposes a non-Hangul unicode character. */
static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
{
int off;
off = hfsplus_decompose_table[(uc >> 12) & 0xf];
if (off == 0 || off == 0xffff)
return NULL;
off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
if (!off)
return NULL;
off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
if (!off)
return NULL;
off = hfsplus_decompose_table[off + (uc & 0xf)];
*size = off & 3;
if (*size == 0)
return NULL;
return hfsplus_decompose_table + (off / 4);
}
/*
* Try to decompose a unicode character as Hangul. Return 0 if @uc is not
* precomposed Hangul, otherwise return the length of the decomposition.
*
* This function was adapted from sample code from the Unicode Standard
* Annex #15: Unicode Normalization Forms, version 3.2.0.
*
* Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
* under the Terms of Use in http://www.unicode.org/copyright.html.
*/
static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
{
int index;
int l, v, t;
index = uc - Hangul_SBase;
if (index < 0 || index >= Hangul_SCount)
return 0;
l = Hangul_LBase + index / Hangul_NCount;
v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
t = Hangul_TBase + index % Hangul_TCount;
result[0] = l;
result[1] = v;
if (t != Hangul_TBase) {
result[2] = t;
return 3;
}
return 2;
}
/* Decomposes a single unicode character. */
static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
{
u16 *result;
/* Hangul is handled separately */
result = hangul_buffer;
*size = hfsplus_try_decompose_hangul(uc, result);
if (*size == 0)
result = hfsplus_decompose_nonhangul(uc, size);
return result;
}
int hfsplus_asc2uni(struct super_block *sb,
struct hfsplus_unistr *ustr, int max_unistr_len,
const char *astr, int len)
{
int size, dsize, decompose;
u16 *dstr, outlen = 0;
wchar_t c;
u16 dhangul[3];
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
while (outlen < max_unistr_len && len > 0) {
size = asc2unichar(sb, astr, len, &c);
if (decompose)
dstr = decompose_unichar(c, &dsize, dhangul);
else
dstr = NULL;
if (dstr) {
if (outlen + dsize > max_unistr_len)
break;
do {
ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
} while (--dsize > 0);
} else
ustr->unicode[outlen++] = cpu_to_be16(c);
astr += size;
len -= size;
}
ustr->length = cpu_to_be16(outlen);
if (len > 0)
return -ENAMETOOLONG;
return 0;
}
/*
* Hash a string to an integer as appropriate for the HFS+ filesystem.
* Composed unicode characters are decomposed and case-folding is performed
* if the appropriate bits are (un)set on the superblock.
*/
int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
{
struct super_block *sb = dentry->d_sb;
const char *astr;
const u16 *dstr;
int casefold, decompose, size, len;
unsigned long hash;
wchar_t c;
u16 c2;
u16 dhangul[3];
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
hash = init_name_hash(dentry);
astr = str->name;
len = str->len;
while (len > 0) {
int dsize;
size = asc2unichar(sb, astr, len, &c);
astr += size;
len -= size;
if (decompose)
dstr = decompose_unichar(c, &dsize, dhangul);
else
dstr = NULL;
if (dstr) {
do {
c2 = *dstr++;
if (casefold)
c2 = case_fold(c2);
if (!casefold || c2)
hash = partial_name_hash(c2, hash);
} while (--dsize > 0);
} else {
c2 = c;
if (casefold)
c2 = case_fold(c2);
if (!casefold || c2)
hash = partial_name_hash(c2, hash);
}
}
str->hash = end_name_hash(hash);
return 0;
}
/*
* Compare strings with HFS+ filename ordering.
* Composed unicode characters are decomposed and case-folding is performed
* if the appropriate bits are (un)set on the superblock.
*/
int hfsplus_compare_dentry(const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
struct super_block *sb = dentry->d_sb;
int casefold, decompose, size;
int dsize1, dsize2, len1, len2;
const u16 *dstr1, *dstr2;
const char *astr1, *astr2;
u16 c1, c2;
wchar_t c;
u16 dhangul_1[3], dhangul_2[3];
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
astr1 = str;
len1 = len;
astr2 = name->name;
len2 = name->len;
dsize1 = dsize2 = 0;
dstr1 = dstr2 = NULL;
while (len1 > 0 && len2 > 0) {
if (!dsize1) {
size = asc2unichar(sb, astr1, len1, &c);
astr1 += size;
len1 -= size;
if (decompose)
dstr1 = decompose_unichar(c, &dsize1,
dhangul_1);
if (!decompose || !dstr1) {
c1 = c;
dstr1 = &c1;
dsize1 = 1;
}
}
if (!dsize2) {
size = asc2unichar(sb, astr2, len2, &c);
astr2 += size;
len2 -= size;
if (decompose)
dstr2 = decompose_unichar(c, &dsize2,
dhangul_2);
if (!decompose || !dstr2) {
c2 = c;
dstr2 = &c2;
dsize2 = 1;
}
}
c1 = *dstr1;
c2 = *dstr2;
if (casefold) {
c1 = case_fold(c1);
if (!c1) {
dstr1++;
dsize1--;
continue;
}
c2 = case_fold(c2);
if (!c2) {
dstr2++;
dsize2--;
continue;
}
}
if (c1 < c2)
return -1;
else if (c1 > c2)
return 1;
dstr1++;
dsize1--;
dstr2++;
dsize2--;
}
if (len1 < len2)
return -1;
if (len1 > len2)
return 1;
return 0;
}