UTF and Unicode-related functions.
More...
|
u32 | gf_utf8_wcstombs (char *dst, size_t dst_len, const unsigned short **srcp) |
| wide-char to multibyte conversion More...
|
|
u32 | gf_utf8_mbstowcs (unsigned short *dst, size_t dst_len, const char **srcp) |
| multibyte to wide-char conversion More...
|
|
u32 | gf_utf8_wcslen (const unsigned short *s) |
| wide-char string length More...
|
|
GF_Err | gf_utf_get_string_from_bom (const u8 *data, u32 size, char **out_ptr, char **result, u32 *res_size) |
| returns a string from a string started with BOM More...
|
|
Bool | gf_utf8_is_legal (const u8 *data, u32 size) |
| Checks validity of a UTF8 string. More...
|
|
Bool | gf_utf8_reorder_bidi (u16 *utf_string, u32 len) |
| string bidi reordering More...
|
|
u32 | utf8_to_ucs4 (u32 *ucs4_buf, u32 utf8_len, unsigned char *utf8_buf) |
| Unicode conversion from UTF-8 to UCS-4. More...
|
|
This section documents the UTF functions of the GPAC framework.
The wide characters in GPAC are unsignad shorts, in other words GPAC only supports UTF8 and UTF16 coding styles.
- Note
- these functions are just ports of libutf8 library tools into GPAC.
◆ GF_UTF8_FAIL
#define GF_UTF8_FAIL 0xFFFFFFFF |
error code for UTF-8 conversion errors
◆ gf_utf8_wcstombs()
u32 gf_utf8_wcstombs |
( |
char * |
dst, |
|
|
size_t |
dst_len, |
|
|
const unsigned short ** |
srcp |
|
) |
| |
Converts a wide-char string to a multibyte string
- Parameters
-
dst | multibyte destination buffer |
dst_len | multibyte destination buffer size |
srcp | address of the wide-char string. This will be set to the next char to be converted in the input buffer if not enough space in the destination, or NULL if conversion was completed. |
- Returns
- length (in byte) of the multibyte string or GF_UTF8_FAIL if error.
◆ gf_utf8_mbstowcs()
u32 gf_utf8_mbstowcs |
( |
unsigned short * |
dst, |
|
|
size_t |
dst_len, |
|
|
const char ** |
srcp |
|
) |
| |
Converts a multibyte string to a wide-char string
- Parameters
-
dst | wide-char destination buffer |
dst_len | wide-char destination buffer size |
srcp | address of the multibyte character buffer. This will be set to the next char to be converted in the input buffer if not enough space in the destination, or NULL if conversion was completed. |
- Returns
- length (in unsigned short) of the wide-char string or GF_UTF8_FAIL if error.
◆ gf_utf8_wcslen()
u32 gf_utf8_wcslen |
( |
const unsigned short * |
s | ) |
|
Gets the length in character of a wide-char string
- Parameters
-
- Returns
- the wide-char string length
◆ gf_utf_get_string_from_bom()
GF_Err gf_utf_get_string_from_bom |
( |
const u8 * |
data, |
|
|
u32 |
size, |
|
|
char ** |
out_ptr, |
|
|
char ** |
result, |
|
|
u32 * |
res_size |
|
) |
| |
Returns string from data, potentially converting utf16 to utf8
- Parameters
-
data | the string or wide-char string |
size | of the data buffer size of the data buffer |
out_ptr | set to an allocated buffer if needed for conversion, shall be destroyed by caller. Must not be NULL |
result | set to resulting string. Must not be NULL |
res_size | set to length of resulting string. May be NULL |
- Returns
- error if any: GF_IO_ERR if UTF decode error or GF_BAD_PARAM
◆ gf_utf8_is_legal()
Bool gf_utf8_is_legal |
( |
const u8 * |
data, |
|
|
u32 |
size |
|
) |
| |
Checks if a given byte sequence is a valid UTF-8 encoding
- Parameters
-
data | the byte equence buffer |
size | the length of the byte sequence |
- Returns
- GF_TRUE if valid UTF8, GF_FALSE otherwise
◆ gf_utf8_reorder_bidi()
Bool gf_utf8_reorder_bidi |
( |
u16 * |
utf_string, |
|
|
u32 |
len |
|
) |
| |
Performs a simple reordering of words in the string based on each word direction, so that glyphs are sorted in display order.
- Parameters
-
utf_string | the wide-char string |
len | the len of the wide-char string |
- Returns
- 1 if the main direction is right-to-left, 0 otherwise
◆ utf8_to_ucs4()
u32 utf8_to_ucs4 |
( |
u32 * |
ucs4_buf, |
|
|
u32 |
utf8_len, |
|
|
unsigned char * |
utf8_buf |
|
) |
| |
- Parameters
-
ucs4_buf | The UCS-4 buffer to fill |
utf8_len | The length of the UTF-8 buffer |
utf8_buf | The buffer containing the UTF-8 data |
- Returns
- the length of the ucs4_buf. Note that the ucs4_buf should be allocated by parent and should be at least utf8_len * 4
◆ UTF8_MAX_BYTES_PER_CHAR
const u32 UTF8_MAX_BYTES_PER_CHAR = 4 |
|
static |
maximum character size in bytes