uscript.h File Reference

C API: Unicode Script Information. More...

#include "unicode/utypes.h"

Go to the source code of this file.

Enumerations

enum  UScriptCode {
  USCRIPT_INVALID_CODE = -1, USCRIPT_COMMON = 0, USCRIPT_INHERITED = 1, USCRIPT_ARABIC = 2,
  USCRIPT_ARMENIAN = 3, USCRIPT_BENGALI = 4, USCRIPT_BOPOMOFO = 5, USCRIPT_CHEROKEE = 6,
  USCRIPT_COPTIC = 7, USCRIPT_CYRILLIC = 8, USCRIPT_DESERET = 9, USCRIPT_DEVANAGARI = 10,
  USCRIPT_ETHIOPIC = 11, USCRIPT_GEORGIAN = 12, USCRIPT_GOTHIC = 13, USCRIPT_GREEK = 14,
  USCRIPT_GUJARATI = 15, USCRIPT_GURMUKHI = 16, USCRIPT_HAN = 17, USCRIPT_HANGUL = 18,
  USCRIPT_HEBREW = 19, USCRIPT_HIRAGANA = 20, USCRIPT_KANNADA = 21, USCRIPT_KATAKANA = 22,
  USCRIPT_KHMER = 23, USCRIPT_LAO = 24, USCRIPT_LATIN = 25, USCRIPT_MALAYALAM = 26,
  USCRIPT_MONGOLIAN = 27, USCRIPT_MYANMAR = 28, USCRIPT_OGHAM = 29, USCRIPT_OLD_ITALIC = 30,
  USCRIPT_ORIYA = 31, USCRIPT_RUNIC = 32, USCRIPT_SINHALA = 33, USCRIPT_SYRIAC = 34,
  USCRIPT_TAMIL = 35, USCRIPT_TELUGU = 36, USCRIPT_THAANA = 37, USCRIPT_THAI = 38,
  USCRIPT_TIBETAN = 39, USCRIPT_CANADIAN_ABORIGINAL = 40, USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, USCRIPT_YI = 41,
  USCRIPT_TAGALOG = 42, USCRIPT_HANUNOO = 43, USCRIPT_BUHID = 44, USCRIPT_TAGBANWA = 45,
  USCRIPT_BRAILLE = 46, USCRIPT_CYPRIOT = 47, USCRIPT_LIMBU = 48, USCRIPT_LINEAR_B = 49,
  USCRIPT_OSMANYA = 50, USCRIPT_SHAVIAN = 51, USCRIPT_TAI_LE = 52, USCRIPT_UGARITIC = 53,
  USCRIPT_KATAKANA_OR_HIRAGANA = 54, USCRIPT_BUGINESE = 55, USCRIPT_GLAGOLITIC = 56, USCRIPT_KHAROSHTHI = 57,
  USCRIPT_SYLOTI_NAGRI = 58, USCRIPT_NEW_TAI_LUE = 59, USCRIPT_TIFINAGH = 60, USCRIPT_OLD_PERSIAN = 61,
  USCRIPT_BALINESE = 62, USCRIPT_BATAK = 63, USCRIPT_BLISSYMBOLS = 64, USCRIPT_BRAHMI = 65,
  USCRIPT_CHAM = 66, USCRIPT_CIRTH = 67, USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, USCRIPT_DEMOTIC_EGYPTIAN = 69,
  USCRIPT_HIERATIC_EGYPTIAN = 70, USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, USCRIPT_KHUTSURI = 72, USCRIPT_SIMPLIFIED_HAN = 73,
  USCRIPT_TRADITIONAL_HAN = 74, USCRIPT_PAHAWH_HMONG = 75, USCRIPT_OLD_HUNGARIAN = 76, USCRIPT_HARAPPAN_INDUS = 77,
  USCRIPT_JAVANESE = 78, USCRIPT_KAYAH_LI = 79, USCRIPT_LATIN_FRAKTUR = 80, USCRIPT_LATIN_GAELIC = 81,
  USCRIPT_LEPCHA = 82, USCRIPT_LINEAR_A = 83, USCRIPT_MANDAEAN = 84, USCRIPT_MAYAN_HIEROGLYPHS = 85,
  USCRIPT_MEROITIC = 86, USCRIPT_NKO = 87, USCRIPT_ORKHON = 88, USCRIPT_OLD_PERMIC = 89,
  USCRIPT_PHAGS_PA = 90, USCRIPT_PHOENICIAN = 91, USCRIPT_PHONETIC_POLLARD = 92, USCRIPT_RONGORONGO = 93,
  USCRIPT_SARATI = 94, USCRIPT_ESTRANGELO_SYRIAC = 95, USCRIPT_WESTERN_SYRIAC = 96, USCRIPT_EASTERN_SYRIAC = 97,
  USCRIPT_TENGWAR = 98, USCRIPT_VAI = 99, USCRIPT_VISIBLE_SPEECH = 100, USCRIPT_CUNEIFORM = 101,
  USCRIPT_UNWRITTEN_LANGUAGES = 102, USCRIPT_UNKNOWN = 103, USCRIPT_CODE_LIMIT = 104
}
 Constants for ISO 15924 script codes. More...

Functions

int32_t uscript_getCode (const char *nameOrAbbrOrLocale, UScriptCode *fillIn, int32_t capacity, UErrorCode *err)
 Gets script codes associated with the given locale or ISO 15924 abbreviation or name.
const char * uscript_getName (UScriptCode scriptCode)
 Gets a script name associated with the given script code.
const char * uscript_getShortName (UScriptCode scriptCode)
 Gets a script name associated with the given script code.
UScriptCode uscript_getScript (UChar32 codepoint, UErrorCode *err)
 Gets the script code associated with the given codepoint.


Detailed Description

C API: Unicode Script Information.

Definition in file uscript.h.


Enumeration Type Documentation

enum UScriptCode

Constants for ISO 15924 script codes.

Many of these script codes - those from Unicode's ScriptNames.txt - are character property values for Unicode's Script property. See UAX #24 Script Names (http://www.unicode.org/reports/tr24/).

Starting with ICU 3.6, constants for most ISO 15924 script codes are included (currently excluding private-use codes Qaaa..Qabx). For scripts for which there are codes in ISO 15924 but which are not used in the Unicode Character Database (UCD), there are no Unicode characters associated with those scripts.

For example, there are no characters that have a UCD script code of Hans or Hant. All Han ideographs have the Hani script code. The Hans and Hant script codes are used with CLDR data.

ISO 15924 script codes are included for use with CLDR and similar.

Stable:
ICU 2.2
Enumerator:
USCRIPT_INVALID_CODE 
USCRIPT_COMMON 
USCRIPT_INHERITED 
USCRIPT_ARABIC 
USCRIPT_ARMENIAN 
USCRIPT_BENGALI 
USCRIPT_BOPOMOFO 
USCRIPT_CHEROKEE 
USCRIPT_COPTIC 
USCRIPT_CYRILLIC 
USCRIPT_DESERET 
USCRIPT_DEVANAGARI 
USCRIPT_ETHIOPIC 
USCRIPT_GEORGIAN 
USCRIPT_GOTHIC 
USCRIPT_GREEK 
USCRIPT_GUJARATI 
USCRIPT_GURMUKHI 
USCRIPT_HAN 
USCRIPT_HANGUL 
USCRIPT_HEBREW 
USCRIPT_HIRAGANA 
USCRIPT_KANNADA 
USCRIPT_KATAKANA 
USCRIPT_KHMER 
USCRIPT_LAO 
USCRIPT_LATIN 
USCRIPT_MALAYALAM 
USCRIPT_MONGOLIAN 
USCRIPT_MYANMAR 
USCRIPT_OGHAM 
USCRIPT_OLD_ITALIC 
USCRIPT_ORIYA 
USCRIPT_RUNIC 
USCRIPT_SINHALA 
USCRIPT_SYRIAC 
USCRIPT_TAMIL 
USCRIPT_TELUGU 
USCRIPT_THAANA 
USCRIPT_THAI 
USCRIPT_TIBETAN 
USCRIPT_CANADIAN_ABORIGINAL  Canadian_Aboriginal script.

Stable:
ICU 2.6
USCRIPT_UCAS  Canadian_Aboriginal script (alias).

Stable:
ICU 2.2
USCRIPT_YI 
USCRIPT_TAGALOG 
USCRIPT_HANUNOO 
USCRIPT_BUHID 
USCRIPT_TAGBANWA 
USCRIPT_BRAILLE 
USCRIPT_CYPRIOT 
USCRIPT_LIMBU 
USCRIPT_LINEAR_B 
USCRIPT_OSMANYA 
USCRIPT_SHAVIAN 
USCRIPT_TAI_LE 
USCRIPT_UGARITIC 
USCRIPT_KATAKANA_OR_HIRAGANA  New script code in Unicode 4.0.1.

Stable:
ICU 3.0
USCRIPT_BUGINESE 
USCRIPT_GLAGOLITIC 
USCRIPT_KHAROSHTHI 
USCRIPT_SYLOTI_NAGRI 
USCRIPT_NEW_TAI_LUE 
USCRIPT_TIFINAGH 
USCRIPT_OLD_PERSIAN 
USCRIPT_BALINESE 
USCRIPT_BATAK 
USCRIPT_BLISSYMBOLS 
USCRIPT_BRAHMI 
USCRIPT_CHAM 
USCRIPT_CIRTH 
USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC 
USCRIPT_DEMOTIC_EGYPTIAN 
USCRIPT_HIERATIC_EGYPTIAN 
USCRIPT_EGYPTIAN_HIEROGLYPHS 
USCRIPT_KHUTSURI 
USCRIPT_SIMPLIFIED_HAN 
USCRIPT_TRADITIONAL_HAN 
USCRIPT_PAHAWH_HMONG 
USCRIPT_OLD_HUNGARIAN 
USCRIPT_HARAPPAN_INDUS 
USCRIPT_JAVANESE 
USCRIPT_KAYAH_LI 
USCRIPT_LATIN_FRAKTUR 
USCRIPT_LATIN_GAELIC 
USCRIPT_LEPCHA 
USCRIPT_LINEAR_A 
USCRIPT_MANDAEAN 
USCRIPT_MAYAN_HIEROGLYPHS 
USCRIPT_MEROITIC 
USCRIPT_NKO 
USCRIPT_ORKHON 
USCRIPT_OLD_PERMIC 
USCRIPT_PHAGS_PA 
USCRIPT_PHOENICIAN 
USCRIPT_PHONETIC_POLLARD 
USCRIPT_RONGORONGO 
USCRIPT_SARATI 
USCRIPT_ESTRANGELO_SYRIAC 
USCRIPT_WESTERN_SYRIAC 
USCRIPT_EASTERN_SYRIAC 
USCRIPT_TENGWAR 
USCRIPT_VAI 
USCRIPT_VISIBLE_SPEECH 
USCRIPT_CUNEIFORM 
USCRIPT_UNWRITTEN_LANGUAGES 
USCRIPT_UNKNOWN 
USCRIPT_CODE_LIMIT 

Definition at line 45 of file uscript.h.


Function Documentation

int32_t uscript_getCode ( const char *  nameOrAbbrOrLocale,
UScriptCode fillIn,
int32_t  capacity,
UErrorCode err 
)

Gets script codes associated with the given locale or ISO 15924 abbreviation or name.

Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". Fills in USCRIPT_LATIN given "en" OR "en_US" If required capacity is greater than capacity of the destination buffer then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned

Note: To search by short or long script alias only, use u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does a fast lookup with no access of the locale data.

Parameters:
nameOrAbbrOrLocale name of the script, as given in PropertyValueAliases.txt, or ISO 15924 code or locale
fillIn the UScriptCode buffer to fill in the script code
capacity the capacity (size) fo UScriptCode buffer passed in.
err the error status code.
Returns:
The number of script codes filled in the buffer passed in
Stable:
ICU 2.4

const char* uscript_getName ( UScriptCode  scriptCode  ) 

Gets a script name associated with the given script code.

Returns "Malayam" given USCRIPT_MALAYALAM

Parameters:
scriptCode UScriptCode enum
Returns:
script long name as given in PropertyValueAliases.txt, or NULL if scriptCode is invalid
Stable:
ICU 2.4

UScriptCode uscript_getScript ( UChar32  codepoint,
UErrorCode err 
)

Gets the script code associated with the given codepoint.

Returns USCRIPT_MALAYALAM given 0x0D02

Parameters:
codepoint UChar32 codepoint
err the error status code.
Returns:
The UScriptCode, or 0 if codepoint is invalid
Stable:
ICU 2.4

const char* uscript_getShortName ( UScriptCode  scriptCode  ) 

Gets a script name associated with the given script code.

Returns "Mlym" given USCRIPT_MALAYALAM

Parameters:
scriptCode UScriptCode enum
Returns:
script abbreviated name as given in PropertyValueAliases.txt, or NULL if scriptCode is invalid
Stable:
ICU 2.4


Generated on Mon Aug 13 07:17:27 2007 for ICU 3.6 by  doxygen 1.5.2