1. ------------------------------------------------------------------------------ 
  2. --                  GtkAda - Ada95 binding for Gtk+/Gnome                   -- 
  3. --                                                                          -- 
  4. --                     Copyright (C) 2003-2014, AdaCore                     -- 
  5. --                                                                          -- 
  6. -- This library is free software;  you can redistribute it and/or modify it -- 
  7. -- under terms of the  GNU General Public License  as published by the Free -- 
  8. -- Software  Foundation;  either version 3,  or (at your  option) any later -- 
  9. -- version. This library is distributed in the hope that it will be useful, -- 
  10. -- but WITHOUT ANY WARRANTY;  without even the implied warranty of MERCHAN- -- 
  11. -- TABILITY or FITNESS FOR A PARTICULAR PURPOSE.                            -- 
  12. --                                                                          -- 
  13. -- As a special exception under Section 7 of GPL version 3, you are granted -- 
  14. -- additional permissions described in the GCC Runtime Library Exception,   -- 
  15. -- version 3.1, as published by the Free Software Foundation.               -- 
  16. --                                                                          -- 
  17. -- You should have received a copy of the GNU General Public License and    -- 
  18. -- a copy of the GCC Runtime Library Exception along with this program;     -- 
  19. -- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    -- 
  20. -- <http://www.gnu.org/licenses/>.                                          -- 
  21. --                                                                          -- 
  22. ------------------------------------------------------------------------------ 
  23.  
  24. --  <description> 
  25. -- 
  26. --  This package provides functions for handling of unicode characters and 
  27. --  utf8 strings. See also Glib.Convert. 
  28. -- 
  29. --  </description> 
  30. --  <c_version>2.2.1</c_version> 
  31. --  <group>Glib, the general-purpose library</group> 
  32.  
  33. with Interfaces.C.Strings; 
  34.  
  35. package Glib.Unicode is 
  36.    pragma Preelaborate; 
  37.  
  38.    package ICS renames Interfaces.C.Strings; 
  39.  
  40.    procedure UTF8_Validate 
  41.      (Str         : UTF8_String; 
  42.       Valid       : out Boolean; 
  43.       Invalid_Pos : out Natural); 
  44.    --  Validate a UTF8 string. 
  45.    --  Set Valid to True if valid, set Invalid_Pos to first invalid byte. 
  46.  
  47.    ----------------------- 
  48.    -- Character classes -- 
  49.    ----------------------- 
  50.  
  51.    type G_Unicode_Type is 
  52.      (Unicode_Control, 
  53.       Unicode_Format, 
  54.       Unicode_Unassigned, 
  55.       Unicode_Private_Use, 
  56.       Unicode_Surrogate, 
  57.       Unicode_Lowercase_Letter, 
  58.       Unicode_Modifier_Letter, 
  59.       Unicode_Other_Letter, 
  60.       Unicode_Titlecase_Letter, 
  61.       Unicode_Uppercase_Letter, 
  62.       Unicode_Combining_Mark, 
  63.       Unicode_Enclosing_Mark, 
  64.       Unicode_Non_Spacing_Mark, 
  65.       Unicode_Decimal_Number, 
  66.       Unicode_Letter_Number, 
  67.       Unicode_Other_Number, 
  68.       Unicode_Connect_Punctuation, 
  69.       Unicode_Dash_Punctuation, 
  70.       Unicode_Close_Punctuation, 
  71.       Unicode_Final_Punctuation, 
  72.       Unicode_Initial_Punctuation, 
  73.       Unicode_Other_Punctuation, 
  74.       Unicode_Open_Punctuation, 
  75.       Unicode_Currency_Symbol, 
  76.       Unicode_Modifier_Symbol, 
  77.       Unicode_Math_Symbol, 
  78.       Unicode_Other_Symbol, 
  79.       Unicode_Line_Separator, 
  80.       Unicode_Paragraph_Separator, 
  81.       Unicode_Space_Separator); 
  82.    --  The possible character classifications. 
  83.    --  See http://www.unicode.org/Public/UNIDATA/UCD.html 
  84.  
  85.    function Is_Space (Char : Gunichar) return Boolean; 
  86.    --  True if Char is a space character 
  87.  
  88.    function Is_Alnum (Char : Gunichar) return Boolean; 
  89.    --  True if Char is an alphabetical or numerical character 
  90.  
  91.    function Is_Alpha (Char : Gunichar) return Boolean; 
  92.    --  True if Char is an alphabetical character 
  93.  
  94.    function Is_Digit (Char : Gunichar) return Boolean; 
  95.    --  True if Char is a digit 
  96.  
  97.    function Is_Lower (Char : Gunichar) return Boolean; 
  98.    --  True if Char is a lower-case character 
  99.  
  100.    function Is_Upper (Char : Gunichar) return Boolean; 
  101.    --  True if Char is an upper-case character 
  102.  
  103.    function Is_Punct (Char : Gunichar) return Boolean; 
  104.    --  True if Char is a punctuation character 
  105.  
  106.    function Unichar_Type (Char : Gunichar) return G_Unicode_Type; 
  107.    --  Return the unicode character type of a given character 
  108.  
  109.    ------------------- 
  110.    -- Case handling -- 
  111.    ------------------- 
  112.  
  113.    function To_Lower (Char : Gunichar) return Gunichar; 
  114.    --  Convert Char to lower cases 
  115.  
  116.    function To_Upper (Char : Gunichar) return Gunichar; 
  117.    --  Convert Char to upper cases 
  118.  
  119.    function UTF8_Strdown 
  120.      (Str : ICS.chars_ptr; Len : Integer) return ICS.chars_ptr; 
  121.    pragma Import (C, UTF8_Strdown, "g_utf8_strdown"); 
  122.    --  Convert all characters in Str to lowercase. The resulting string 
  123.    --  must be freed by the user. It can have a different length than 
  124.    --  Str. 
  125.  
  126.    function UTF8_Strdown (Str : UTF8_String) return UTF8_String; 
  127.    --  Convert Str to lower cases 
  128.  
  129.    function UTF8_Strup 
  130.      (Str : ICS.chars_ptr; Len : Integer) return ICS.chars_ptr; 
  131.    pragma Import (C, UTF8_Strup, "g_utf8_strup"); 
  132.    --  Convert all characters in Str to uppercase. The resulting string is 
  133.    --  newly allocated, and can have a different length than Str (for 
  134.    --  instance, the german ess-zet is converted to SS). 
  135.    --  The returned string must be freed by the caller. 
  136.  
  137.    function UTF8_Strup (Str : UTF8_String) return UTF8_String; 
  138.    --  Convert Str to upper cases 
  139.  
  140.    --------------------------- 
  141.    --  Manipulating strings -- 
  142.    --------------------------- 
  143.  
  144.    function UTF8_Strlen 
  145.      (Str : ICS.chars_ptr; Max : Integer := -1) return Glong; 
  146.    pragma Import (C, UTF8_Strlen, "g_utf8_strlen"); 
  147.    --  Return the length of a utf8-encoded string. 
  148.    --  Max is the maximal number of bytes to examine. If it is negative, then 
  149.    --  the string is assumed to be nul-terminated. 
  150.  
  151.    function UTF8_Strlen (Str : UTF8_String) return Glong; 
  152.    --  Return the number of characters in Str 
  153.  
  154.    function UTF8_Find_Next_Char 
  155.      (Str     : ICS.chars_ptr; 
  156.       Str_End : ICS.chars_ptr := ICS.Null_Ptr) return ICS.chars_ptr; 
  157.    pragma Import (C, UTF8_Find_Next_Char, "g_utf8_find_next_char"); 
  158.    --  Find the start of the next UTF8 character after Str. 
  159.    --  Str_End points to the end of the string. If Null_Ptr, the string must 
  160.    --  be nul-terminated 
  161.  
  162.    function UTF8_Find_Next_Char 
  163.      (Str : UTF8_String; Index : Natural) return Natural; 
  164.    pragma Inline (UTF8_Find_Next_Char); 
  165.    --  Find the start of the next UTF8 character after the Index-th byte. 
  166.    --  Index doesn't need to be on the start of a character. 
  167.    --  Index is set to a value greater than Str'Last if there is no more 
  168.    --  character. 
  169.  
  170.    function UTF8_Next_Char 
  171.      (Str : UTF8_String; Index : Natural) return Natural; 
  172.    pragma Inline (UTF8_Next_Char); 
  173.    --  Find the start of the next UTF8 character after the Index-th byte. 
  174.    --  Index has to be on the start of a character. 
  175.    --  Index is set to a value greater than Str'Last if there is no more 
  176.    --  character. 
  177.  
  178.    function UTF8_Find_Prev_Char 
  179.      (Str_Start : ICS.chars_ptr; Str : ICS.chars_ptr) return ICS.chars_ptr; 
  180.    pragma Import (C, UTF8_Find_Prev_Char, "g_utf8_find_prev_char"); 
  181.    --  Find the start of the previous UTF8 character before Str. 
  182.    --  Str_Start is a pointer to the beginning of the string. 
  183.    --  Null_Ptr is returned if there is no previous character 
  184.  
  185.    function UTF8_Find_Prev_Char 
  186.      (Str : UTF8_String; Index : Natural) return Natural; 
  187.    --  Find the start of the previous UTF8 character after the Index-th byte. 
  188.    --  Index doesn't need to be on the start of a character. 
  189.    --  Index is set to a value smaller than Str'First if there is no 
  190.    --  previous character. 
  191.  
  192.    ----------------- 
  193.    -- Conversions -- 
  194.    ----------------- 
  195.  
  196.    function Unichar_To_UTF8 
  197.      (C : Gunichar; Buffer : ICS.chars_ptr := ICS.Null_Ptr) return Natural; 
  198.    pragma Import (C, Unichar_To_UTF8, "g_unichar_to_utf8"); 
  199.    --  Encode C into Buffer, which must have at least 6 bytes free. 
  200.    --  Return the number of bytes written in Buffer. 
  201.    --  If Buffer is Null_Ptr, then the only effect is to compute the number of 
  202.    --  bytes to encode C. 
  203.  
  204.    procedure Unichar_To_UTF8 
  205.      (C      : Gunichar; 
  206.       Buffer : out UTF8_String; 
  207.       Last   : out Natural); 
  208.    --  Encode C into Buffer. Buffer must have at least 6 bytes free. 
  209.    --  Return the index of the last byte written in Buffer. 
  210.  
  211.    function UTF8_Get_Char (Str : UTF8_String) return Gunichar; 
  212.    --  Converts a sequence of bytes encoded as UTF8 to a unicode character. 
  213.    --  If Str doesn't point to a valid UTF8 encoded character, the result is 
  214.    --  undefined. 
  215.  
  216.    function UTF8_Get_Char_Validated (Str : UTF8_String) return Gunichar; 
  217.    --  Same as above. However, if the sequence if an incomplete start of a 
  218.    --  possibly valid character, it returns -2. If the sequence is invalid, 
  219.    --  returns -1. 
  220.  
  221.    --  ??? Gunichar is unsigned, how can we test -2 or -1 ? 
  222.  
  223. private 
  224.    pragma Convention (C, G_Unicode_Type); 
  225.    pragma Import (C, To_Upper, "g_unichar_toupper"); 
  226.    pragma Import (C, To_Lower, "g_unichar_tolower"); 
  227.    pragma Import (C, Unichar_Type, "g_unichar_type"); 
  228. end Glib.Unicode;