Logo Search packages:      
Sourcecode: fuse-umfuse-fat version File versions  Download package

Unicode support library


Detailed Description

The FreeDOS-32 Unicode support library provides facilities to manage UTF-8, UTF-16 and wide characters (format conversion, case folding).
This manual documents version 2.1 of the library.
The library is distributed under the terms of the GNU General Public License.

A UTF-8 character is converted to a wide character (UTF-32 or UCS-4) using the following rules (binary numbers):

UTF-32                     - UTF-8
00000000 00000000 0aaaaaaa - 0aaaaaaa
00000000 00000bbb bbaaaaaa - 110bbbbb 10aaaaaa
00000000 ccccbbbb bbaaaaaa - 1110cccc 10bbbbbb 10aaaaaa
000dddcc ccccbbbb bbaaaaaa - 11110ddd 10cccccc 10bbbbbb 10aaaaaa

A UTF-16 character is converted to a wide character (UTF-32 or UCS-4) using the following rules (binary numbers):

UTF-32                     - UTF-16
00000000 aaaaaaaa aaaaaaaa <-> aaaaaaaa aaaaaaaa
000bbbbb aaaaaaaa aaaaaaaa <-> 110110cc ccaaaaaa  110111aa aaaaaaaa
where cccc = bbbbb - 1.


Defines

#define unicode_utf16len   unicode_utf16le_len
#define unicode_utf16towc   unicode_utf16le_to_wchar
#define unicode_utf8len   unicode_utf8_len
#define unicode_utf8towc   unicode_utf8_to_wchar
#define unicode_wctoutf16   unicode_wchar_to_utf16le
#define unicode_wctoutf8   unicode_wchar_to_utf8

Functions

static uint16_t big_endian_16 (uint16_t v)
static uint16_t little_endian_16 (uint16_t v)
wchar_t unicode_simple_fold (wchar_t wc)
 Simple case folding of a wide character.
int unicode_utf16be_len (uint16_t lead_word)
 Gets the length of a UTF-16BE character.
int unicode_utf16be_to_wchar (wchar_t *restrict result, const uint16_t *restrict string, size_t size)
 UTF-16BE to wide character.
int unicode_utf16le_len (uint16_t lead_word)
 Gets the length of a UTF-16LE character.
int unicode_utf16le_to_wchar (wchar_t *restrict result, const uint16_t *restrict string, size_t size)
 UTF-16LE to wide character.
int unicode_utf8_len (char lead_byte)
 Gets the length of a UTF-8 character.
int unicode_utf8_to_wchar (wchar_t *restrict result, const char *restrict string, size_t size)
 UTF-8 to wide character.
int unicode_wchar_to_utf16be (uint16_t *s, wchar_t wc, size_t size)
 Wide character to UTF-16BE.
int unicode_wchar_to_utf16le (uint16_t *s, wchar_t wc, size_t size)
 Wide character to UTF-16.
int unicode_wchar_to_utf8 (char *s, wchar_t wc, size_t size)
 Wide character to UTF-8.

Variables

static const uint16_t page_00 [256]
static const uint16_t page_01 [256]
static const uint16_t page_02 [256]
static const uint16_t page_03 [256]
static const uint16_t page_04 [256]
static const uint16_t page_05 [256]
static const uint16_t page_10 [256]
static const uint16_t page_1E [256]
static const uint16_t page_1F [256]
static const uint16_t page_21 [256]
static const uint16_t page_24 [256]
static const uint16_t page_2C [256]
static const uint16_t page_FF [256]
static const uint16_t * pages [256]
struct {
   char   mask
   char   val
t [4]


Generated by  Doxygen 1.6.0   Back to index