libgpac
Documentation of the core library of GPAC
utf.c File Reference
#include <gpac/utf.h>
+ Include dependency graph for utf.c:

Macros

#define UNI_REPLACEMENT_CHAR   (UTF32)0x0000FFFD
 
#define UNI_MAX_BMP   (UTF32)0x0000FFFF
 
#define UNI_MAX_UTF16   (UTF32)0x0010FFFF
 
#define UNI_MAX_UTF32   (UTF32)0x7FFFFFFF
 
#define UNI_MAX_LEGAL_UTF32   (UTF32)0x0010FFFF
 
#define UNI_SUR_HIGH_START   (UTF32)0xD800
 
#define UNI_SUR_HIGH_END   (UTF32)0xDBFF
 
#define UNI_SUR_LOW_START   (UTF32)0xDC00
 
#define UNI_SUR_LOW_END   (UTF32)0xDFFF
 
#define false   0
 
#define true   1
 

Typedefs

typedef u32 UTF32
 
typedef u16 UTF16
 
typedef u8 UTF8
 
typedef u8 Boolean
 

Enumerations

enum  ConversionResult { conversionOK , sourceExhausted , targetExhausted , sourceIllegal }
 
enum  ConversionFlags { strictConversion = 0 , lenientConversion }
 

Functions

ConversionResult ConvertUTF16toUTF8 (const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
 
Boolean isLegalUTF8 (const UTF8 *source, int length)
 
ConversionResult ConvertUTF8toUTF16 (const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)
 
GF_EXPORT Bool gf_utf8_is_legal (const u8 *data, u32 length)
 Checks validity of a UTF8 string. More...
 
GF_EXPORT u32 gf_utf8_wcslen (const unsigned short *s)
 wide-char string length More...
 
GF_EXPORT u32 gf_utf8_wcstombs (char *dest, size_t len, const unsigned short **srcp)
 wide-char to multibyte conversion More...
 
GF_EXPORT u32 gf_utf8_mbstowcs (unsigned short *dest, size_t len, const char **srcp)
 multibyte to wide-char conversion More...
 
GF_EXPORT GF_Err gf_utf_get_string_from_bom (const u8 *data, u32 size, char **out_ptr, char **result, u32 *res_size)
 returns a string from a string started with BOM More...
 

Variables

static const int halfShift = 10
 
static const UTF32 halfBase = 0x0010000UL
 
static const UTF32 halfMask = 0x3FFUL
 
static const char trailingBytesForUTF8 [256]
 
static const UTF32 offsetsFromUTF8 [6]
 
static const UTF8 firstByteMark [7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }
 

Macro Definition Documentation

◆ UNI_REPLACEMENT_CHAR

#define UNI_REPLACEMENT_CHAR   (UTF32)0x0000FFFD

◆ UNI_MAX_BMP

#define UNI_MAX_BMP   (UTF32)0x0000FFFF

◆ UNI_MAX_UTF16

#define UNI_MAX_UTF16   (UTF32)0x0010FFFF

◆ UNI_MAX_UTF32

#define UNI_MAX_UTF32   (UTF32)0x7FFFFFFF

◆ UNI_MAX_LEGAL_UTF32

#define UNI_MAX_LEGAL_UTF32   (UTF32)0x0010FFFF

◆ UNI_SUR_HIGH_START

#define UNI_SUR_HIGH_START   (UTF32)0xD800

◆ UNI_SUR_HIGH_END

#define UNI_SUR_HIGH_END   (UTF32)0xDBFF

◆ UNI_SUR_LOW_START

#define UNI_SUR_LOW_START   (UTF32)0xDC00

◆ UNI_SUR_LOW_END

#define UNI_SUR_LOW_END   (UTF32)0xDFFF

◆ false

#define false   0

◆ true

#define true   1

Typedef Documentation

◆ UTF32

typedef u32 UTF32

◆ UTF16

typedef u16 UTF16

◆ UTF8

typedef u8 UTF8

◆ Boolean

typedef u8 Boolean

Enumeration Type Documentation

◆ ConversionResult

Enumerator
conversionOK 
sourceExhausted 
targetExhausted 
sourceIllegal 

◆ ConversionFlags

Enumerator
strictConversion 
lenientConversion 

Function Documentation

◆ ConvertUTF16toUTF8()

ConversionResult ConvertUTF16toUTF8 ( const UTF16 **  sourceStart,
const UTF16 sourceEnd,
UTF8 **  targetStart,
UTF8 targetEnd,
ConversionFlags  flags 
)
+ Here is the caller graph for this function:

◆ isLegalUTF8()

Boolean isLegalUTF8 ( const UTF8 source,
int  length 
)
+ Here is the caller graph for this function:

◆ ConvertUTF8toUTF16()

ConversionResult ConvertUTF8toUTF16 ( const UTF8 **  sourceStart,
const UTF8 sourceEnd,
UTF16 **  targetStart,
UTF16 targetEnd,
ConversionFlags  flags 
)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ halfShift

const int halfShift = 10
static

◆ halfBase

const UTF32 halfBase = 0x0010000UL
static

◆ halfMask

const UTF32 halfMask = 0x3FFUL
static

◆ trailingBytesForUTF8

const char trailingBytesForUTF8[256]
static
Initial value:
= {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
}

◆ offsetsFromUTF8

const UTF32 offsetsFromUTF8[6]
static
Initial value:
= { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL
}

◆ firstByteMark

const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }
static