diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 3 | ||||
-rw-r--r-- | lib/Makefile | 1 | ||||
-rw-r--r-- | lib/nls_base.c | 131 |
3 files changed, 135 insertions, 0 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index e048aded8b..44f30d8242 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -148,4 +148,7 @@ config GENERIC_LIB_LSHRDI3 config GENERIC_LIB_MULDI3 bool +config NLS + bool "Native language support" + endmenu diff --git a/lib/Makefile b/lib/Makefile index e72aa6655c..763516d41a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -67,6 +67,7 @@ obj-y += parseopt.o obj-y += clz_ctz.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o obj-$(CONFIG_CRC8) += crc8.o +obj-$(CONFIG_NLS) += nls_base.o # GCC library routines obj-$(CONFIG_GENERIC_LIB_ASHLDI3) += ashldi3.o diff --git a/lib/nls_base.c b/lib/nls_base.c new file mode 100644 index 0000000000..cd6a5ff124 --- /dev/null +++ b/lib/nls_base.c @@ -0,0 +1,131 @@ +/* + * linux/fs/nls/nls_base.c + * + * Native language support--charsets and unicode translations. + * By Gordon Chaffee 1996, 1997 + * + * Unicode based case conversion 1999 by Wolfram Pienkoss + * + */ +#include <common.h> +#include <linux/nls.h> + +/* + * Sample implementation from Unicode home page. + * http://www.stonehand.com/unicode/standard/fss-utf.html + */ +struct utf8_table { + int cmask; + int cval; + int shift; + long lmask; + long lval; +}; + +static const struct utf8_table utf8_table[] = +{ + {0x80, 0x00, 0*6, 0x7F, 0, /* 1 byte sequence */}, + {0xE0, 0xC0, 1*6, 0x7FF, 0x80, /* 2 byte sequence */}, + {0xF0, 0xE0, 2*6, 0xFFFF, 0x800, /* 3 byte sequence */}, + {0xF8, 0xF0, 3*6, 0x1FFFFF, 0x10000, /* 4 byte sequence */}, + {0xFC, 0xF8, 4*6, 0x3FFFFFF, 0x200000, /* 5 byte sequence */}, + {0xFE, 0xFC, 5*6, 0x7FFFFFFF, 0x4000000, /* 6 byte sequence */}, + {0, /* end of table */} +}; + +#define UNICODE_MAX 0x0010ffff +#define PLANE_SIZE 0x00010000 + +#define SURROGATE_MASK 0xfffff800 +#define SURROGATE_PAIR 0x0000d800 +#define SURROGATE_LOW 0x00000400 +#define SURROGATE_BITS 0x000003ff + +int utf8_to_utf32(const u8 *s, int inlen, unicode_t *pu) +{ + unsigned long l; + int c0, c, nc; + const struct utf8_table *t; + + nc = 0; + c0 = *s; + l = c0; + for (t = utf8_table; t->cmask; t++) { + nc++; + if ((c0 & t->cmask) == t->cval) { + l &= t->lmask; + if (l < t->lval || l > UNICODE_MAX || + (l & SURROGATE_MASK) == SURROGATE_PAIR) + return -1; + *pu = (unicode_t) l; + return nc; + } + if (inlen <= nc) + return -1; + s++; + c = (*s ^ 0x80) & 0xFF; + if (c & 0xC0) + return -1; + l = (l << 6) | c; + } + return -1; +} +EXPORT_SYMBOL(utf8_to_utf32); + +static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) +{ + switch (endian) { + default: + *s = (wchar_t) c; + break; + case UTF16_LITTLE_ENDIAN: + *s = __cpu_to_le16(c); + break; + case UTF16_BIG_ENDIAN: + *s = __cpu_to_be16(c); + break; + } +} + +int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian, + wchar_t *pwcs, int maxout) +{ + u16 *op; + int size; + unicode_t u; + + op = pwcs; + while (inlen > 0 && maxout > 0 && *s) { + if (*s & 0x80) { + size = utf8_to_utf32(s, inlen, &u); + if (size < 0) + return -EINVAL; + s += size; + inlen -= size; + + if (u >= PLANE_SIZE) { + if (maxout < 2) + break; + u -= PLANE_SIZE; + put_utf16(op++, SURROGATE_PAIR | + ((u >> 10) & SURROGATE_BITS), + endian); + put_utf16(op++, SURROGATE_PAIR | + SURROGATE_LOW | + (u & SURROGATE_BITS), + endian); + maxout -= 2; + } else { + put_utf16(op++, u, endian); + maxout--; + } + } else { + put_utf16(op++, *s++, endian); + inlen--; + maxout--; + } + } + return op - pwcs; +} +EXPORT_SYMBOL(utf8s_to_utf16s); + |