OpenVAS Libraries  9.0.3
charcnv.c
Go to the documentation of this file.
1 /*
2  Unix SMB/CIFS implementation.
3  Character set conversion Extensions
4  Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5  Copyright (C) Andrew Tridgell 2001
6  Copyright (C) Simo Sorce 2001
7  Copyright (C) Martin Pool 2003
8 
9  This program is free software; you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation; either version 2 of the License, or
12  (at your option) any later version.
13 
14  This program is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with this program; if not, write to the Free Software
21  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 
23  MODIFICATIONS: only those functions that are required for OpenVAS are retained, others are removed
24  Modified By Preeti Subramanian <spreeti@secpod.com>
25  1. init_valid_table taken from samba/<source>/lib/util_unistr.c, using a dynamically created valid table only
26  2. valid_table taken from samba/<source>/lib/util_unistr.c
27  3. valid_table_use_unmap taken from samba/<source>/lib/util_unistr.c, BOOL is changed to bool
28  4. check_dos_char_slowly taken from samba/<source>/lib/util_unistr.c, smb_ucs2_t is changed to uint16
29  5. strlen_w taken from samba/<source>/lib/util_unistr.c, smb_ucs2_t is changed to uint16
30  6. strupper_m taken from samba/source/lib/util_str.c, and modified for OpenVAS
31  7. charset_name function changed for OpenVAS
32  8. in lazy_initialize_conv function, loading or generating the case handling tables removed
33  9. in init_iconv, init_doschar_table not required(removed)
34 */
35 #include "byteorder.h"
36 #include "iconv.h"
37 #include "smb.h"
38 #include "proto.h"
39 #include "../misc/openvas_logging.h"
40 
41 #ifndef SMB_STRDUP
42 #define SMB_STRDUP(s) strdup(s)
43 #endif
44 
45 #ifndef uint8
46 #define uint8 uint8_t
47 #endif
48 
49 #ifndef uint16
50 #define uint16 uint16_t
51 #endif
52 
53 #ifndef _PUBLIC_
54 #define _PUBLIC_
55 #endif
56 
57 typedef unsigned int bool;
58 #define False 0
59 #define True 1
60 
61 static uint8 *valid_table_ntlmssp;
62 static bool valid_table_use_unmap_ntlmssp;
64  void const *src, size_t srclen,
65  void *dest, size_t destlen, bool allow_badcharcnv);
66 static int check_dos_char_slowly_ntlmssp(uint16 c)
67 {
68  char buf[10];
69  uint16_t c2 = 0;
70  int len1, len2;
71 
72  len1 = convert_string_ntlmssp(CH_UTF16LE, CH_DOS, &c, 2, buf, sizeof(buf),False);
73  if (len1 == 0) {
74  return 0;
75  }
76  len2 = convert_string_ntlmssp(CH_DOS, CH_UTF16LE, buf, len1, &c2, 2,False);
77  if (len2 != 2) {
78  return 0;
79  }
80  return (c == c2);
81 }
82 
83 /* We can parameterize this if someone complains.... JRA. */
84 
86 {
87  return '_';
88 }
89 
105 static smb_iconv_t conv_handles_ntlmssp[NUM_CHARSETS][NUM_CHARSETS];
106 static bool conv_silent_ntlmssp; /* Should we do a debug if the conversion fails ? */
107 
109 {
110  static int mapped_file;
111  int i;
112  const char *allowed = ".!#$%&'()_-@^`~";
113 
114 if (mapped_file) {
115 /* Can't unmap files, so stick with what we have */
116  return;
117 }
118 
119 
120 /* we're using a dynamically created valid_table.
121  * It might need to be regenerated if the code page changed.
122  * We know that we're not using a mapped file, so we can
123  * free() the old one. */
124 
125 /* use free rather than unmap */
126 valid_table_use_unmap_ntlmssp = False;
127 
128  valid_table_ntlmssp = (uint8 *)SMB_MALLOC(0x10000);
129  for (i=0;i<128;i++) {
130  valid_table_ntlmssp[i] = isalnum(i) || strchr(allowed,i);
131  }
132 
134 
135  for (;i<0x10000;i++) {
136  uint16_t c;
137  SSVAL(&c, 0, i);
138  valid_table_ntlmssp[i] = check_dos_char_slowly_ntlmssp(c);
139  }
140 }
141 
142 /*******************************************************************
143  * Count the number of characters in a uint16_t string.
144  * ********************************************************************/
145 
146 size_t strlen_w_ntlmssp(const uint16 *src)
147 {
148  size_t len;
149  uint16 c;
150 
151  for(len = 0; *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
152  ;
153  }
154 
155  return len;
156 }
157 
158 
162 static const char *charset_name_ntlmssp(charset_t ch)
163 {
164 const char *ret = NULL;
165 
166  if (ch == CH_UTF16LE) ret = "UTF-16LE";
167  else if (ch == CH_UTF16BE) ret = "UTF-16BE";
168  else if (ch == CH_UTF8) ret = "UTF8";
169 
170  #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
171  if (ret && !strcmp(ret, "LOCALE")) {
172  const char *ln = NULL;
173 
174  #ifdef HAVE_SETLOCALE
175  setlocale(LC_ALL, "");
176  #endif
177  ln = nl_langinfo(CODESET);
178  if (ln) {
179  /* Check whether the charset name is supported
180  by iconv */
181  smb_iconv_t handle = smb_iconv_open_ntlmssp(ln,"UCS-2LE");
182  if (handle == (smb_iconv_t) -1) {
183  ln = NULL;
184  } else {
185  smb_iconv_close_ntlmssp(handle);
186  }
187  }
188  ret = ln;
189 }
190 #endif
191 
192 if (!ret || !*ret) ret = "ASCII";
193 return ret;
194 }
195 
197 {
198  static int initialized = False;
199 
200  if (!initialized) {
201  initialized = True;
203  }
204 }
205 
206 
215 {
216  int c1, c2;
217  bool did_reload = False;
218 
219  /* so that charset_name() works we need to get the UNIX<->UCS2 going
220  first */
221  if (!conv_handles_ntlmssp[CH_UNIX][CH_UTF16LE])
222  conv_handles_ntlmssp[CH_UNIX][CH_UTF16LE] = smb_iconv_open_ntlmssp(charset_name_ntlmssp(CH_UTF16LE), "ASCII");
223 
224  if (!conv_handles_ntlmssp[CH_UTF16LE][CH_UNIX])
225  conv_handles_ntlmssp[CH_UTF16LE][CH_UNIX] = smb_iconv_open_ntlmssp("ASCII", charset_name_ntlmssp(CH_UTF16LE));
226 
227  for (c1=0;c1<NUM_CHARSETS;c1++) {
228  for (c2=0;c2<NUM_CHARSETS;c2++) {
229  const char *n1 = charset_name_ntlmssp((charset_t)c1);
230  const char *n2 = charset_name_ntlmssp((charset_t)c2);
231  if (conv_handles_ntlmssp[c1][c2] &&
232  strcmp(n1, conv_handles_ntlmssp[c1][c2]->from_name) == 0 &&
233  strcmp(n2, conv_handles_ntlmssp[c1][c2]->to_name) == 0)
234  continue;
235 
236  did_reload = True;
237 
238  if (conv_handles_ntlmssp[c1][c2])
239  smb_iconv_close_ntlmssp(conv_handles_ntlmssp[c1][c2]);
240 
241  conv_handles_ntlmssp[c1][c2] = smb_iconv_open_ntlmssp(n2,n1);
242  if (conv_handles_ntlmssp[c1][c2] == (smb_iconv_t)-1) {
243  if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
244  n1 = "ASCII";
245  }
246  if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
247  n2 = "ASCII";
248  }
249  conv_handles_ntlmssp[c1][c2] = smb_iconv_open_ntlmssp(n2,n1);
250  if (!conv_handles_ntlmssp[c1][c2]) {
251  log_legacy_write ("init_iconv_ntlmssp: conv_handle"
252  " initialization failed");
253  }
254  }
255  }
256  }
257 
258  if (did_reload) {
259  /* XXX: Does this really get called every time the dos
260  * codepage changes? */
261  /* XXX: Is the did_reload test too strict? */
262  conv_silent_ntlmssp = True;
264  conv_silent_ntlmssp = False;
265  }
266 }
267 
268 
284 static size_t convert_string_internal_ntlmssp(charset_t from, charset_t to,
285  void const *src, size_t srclen,
286  void *dest, size_t destlen, bool allow_bad_conv)
287 {
288  size_t i_len, o_len;
289  size_t retval;
290  const char* inbuf = (const char*)src;
291  char* outbuf = (char*)dest;
292  smb_iconv_t descriptor;
293 
295 
296  descriptor = conv_handles_ntlmssp[from][to];
297 
298  if (srclen == (size_t)-1) {
299  if (from == CH_UTF16LE || from == CH_UTF16BE) {
300  srclen = (strlen_w_ntlmssp((const uint16 *)src)+1) * 2;
301  } else {
302  srclen = strlen((const char *)src)+1;
303  }
304  }
305 
306 
307  if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
308  if (!conv_silent_ntlmssp)
309  return (size_t)-1;
310  }
311 
312  i_len=srclen;
313  o_len=destlen;
314 
315  again:
316 
317  retval = smb_iconv_ntlmssp(descriptor, &inbuf, &i_len, &outbuf, &o_len);
318  if(retval==(size_t)-1) {
319  switch(errno) {
320  case EINVAL:
321  /* Incomplete multibyte sequence */
322  if (!conv_silent_ntlmssp)
323  if (allow_bad_conv)
324  goto use_as_is;
325  return (size_t)-1;
326  case E2BIG:
327  /* No more room */
328  break;
329  case EILSEQ:
330  /* Illegal multibyte sequence */
331  if (allow_bad_conv)
332  goto use_as_is;
333 
334  return (size_t)-1;
335  default:
336  /* unknown error */
337  return (size_t)-1;
338  }
339  }
340  return destlen-o_len;
341 
342  use_as_is:
343 
344  /*
345  * Conversion not supported. This is actually an error, but there are so
346  * many misconfigured iconv systems and smb.conf's out there we can't just
347  * fail. Do a very bad conversion instead.... JRA.
348  */
349 
350  {
351  if (o_len == 0 || i_len == 0)
352  return destlen - o_len;
353 
354  if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
355  ((to != CH_UTF16LE)&&(to != CH_UTF16BE))) {
356  /* Can't convert from utf16 any endian to multibyte.
357  Replace with the default fail char.
358  */
359  if (i_len < 2)
360  return destlen - o_len;
361  if (i_len >= 2) {
362  *outbuf = lp_failed_convert_char_ntlmssp();
363 
364  outbuf++;
365  o_len--;
366 
367  inbuf += 2;
368  i_len -= 2;
369  }
370 
371  if (o_len == 0 || i_len == 0)
372  return destlen - o_len;
373 
374  /* Keep trying with the next char... */
375  goto again;
376 
377  } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
378  /* Can't convert to UTF16LE - just widen by adding the
379  default fail char then zero.
380  */
381  if (o_len < 2)
382  return destlen - o_len;
383 
384  outbuf[0] = lp_failed_convert_char_ntlmssp();
385  outbuf[1] = '\0';
386 
387  inbuf++;
388  i_len--;
389 
390  outbuf += 2;
391  o_len -= 2;
392 
393  if (o_len == 0 || i_len == 0)
394  return destlen - o_len;
395 
396  /* Keep trying with the next char... */
397  goto again;
398 
399  } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
400  to != CH_UTF16LE && to != CH_UTF16BE) {
401  /* Failed multibyte to multibyte. Just copy the default fail char and
402  try again. */
403  outbuf[0] = lp_failed_convert_char_ntlmssp();
404 
405  inbuf++;
406  i_len--;
407 
408  outbuf++;
409  o_len--;
410 
411  if (o_len == 0 || i_len == 0)
412  return destlen - o_len;
413 
414  /* Keep trying with the next char... */
415  goto again;
416 
417  } else {
418  /* Keep compiler happy.... */
419  return destlen - o_len;
420  }
421  }
422 }
423 
442  void const *src, size_t srclen,
443  void *dest, size_t destlen, bool allow_bad_conv)
444 {
445  /*
446  * NB. We deliberately don't do a strlen here if srclen == -1.
447  * This is very expensive over millions of calls and is taken
448  * care of in the slow path in convert_string_internal. JRA.
449  */
450 
451  if (srclen == 0)
452  return 0;
453 
454  if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
455  const unsigned char *p = (const unsigned char *)src;
456  unsigned char *q = (unsigned char *)dest;
457  size_t slen = srclen;
458  size_t dlen = destlen;
459  unsigned char lastp = '\0';
460  size_t retval = 0;
461 
462  /* If all characters are ascii, fast path here. */
463  while (slen && dlen) {
464  if ((lastp = *p) <= 0x7f) {
465  *q++ = *p++;
466  if (slen != (size_t)-1) {
467  slen--;
468  }
469  dlen--;
470  retval++;
471  if (!lastp)
472  break;
473  } else {
474  #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
475  goto general_case;
476  #else
477  size_t ret = convert_string_internal_ntlmssp(from, to, p, slen, q, dlen, allow_bad_conv);
478  if (ret == (size_t)-1) {
479  return ret;
480  }
481  return retval + ret;
482  #endif
483  }
484  }
485  if (!dlen) {
486  /* Even if we fast path we should note if we ran out of room. */
487  if (((slen != (size_t)-1) && slen) ||
488  ((slen == (size_t)-1) && lastp)) {
489  errno = E2BIG;
490  }
491  }
492  return retval;
493  } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
494  const unsigned char *p = (const unsigned char *)src;
495  unsigned char *q = (unsigned char *)dest;
496  size_t retval = 0;
497  size_t slen = srclen;
498  size_t dlen = destlen;
499  unsigned char lastp = '\0';
500 
501  /* If all characters are ascii, fast path here. */
502  while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
503  if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
504  *q++ = *p;
505  if (slen != (size_t)-1) {
506  slen -= 2;
507  }
508  p += 2;
509  dlen--;
510  retval++;
511  if (!lastp)
512  break;
513  } else {
514  #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
515  goto general_case;
516  #else
517  return retval + convert_string_internal_ntlmssp(from, to, p, slen, q, dlen, allow_bad_conv);
518  #endif
519  }
520  }
521  if (!dlen) {
522  /* Even if we fast path we should note if we ran out of room. */
523  if (((slen != (size_t)-1) && slen) ||
524  ((slen == (size_t)-1) && lastp)) {
525  errno = E2BIG;
526  }
527  }
528  return retval;
529  } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
530  const unsigned char *p = (const unsigned char *)src;
531  unsigned char *q = (unsigned char *)dest;
532  size_t retval = 0;
533  size_t slen = srclen;
534  size_t dlen = destlen;
535  unsigned char lastp = '\0';
536 
537  /* If all characters are ascii, fast path here. */
538  while (slen && (dlen >= 2)) {
539  if ((lastp = *p) <= 0x7F) {
540  *q++ = *p++;
541  *q++ = '\0';
542  if (slen != (size_t)-1) {
543  slen--;
544  }
545  dlen -= 2;
546  retval += 2;
547  if (!lastp)
548  break;
549  } else {
550  #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
551  goto general_case;
552  #else
553  return retval + convert_string_internal_ntlmssp(from, to, p, slen, q, dlen, allow_bad_conv);
554  #endif
555  }
556  }
557  if (!dlen) {
558  /* Even if we fast path we should note if we ran out of room. */
559  if (((slen != (size_t)-1) && slen) ||
560  ((slen == (size_t)-1) && lastp)) {
561  errno = E2BIG;
562  }
563  }
564  return retval;
565  }
566 
567  #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
568  general_case:
569  #endif
570  return convert_string_internal_ntlmssp(from, to, src, srclen, dest, destlen, allow_bad_conv);
571 }
charset_t
Definition: charset.h:30
void init_valid_table_ntlmssp(void)
Definition: charcnv.c:108
#define SSVAL(buf, pos, val)
Definition: byteorder.h:122
char lp_failed_convert_char_ntlmssp(void)
Definition: charcnv.c:85
Definition: charset.h:30
size_t convert_string_ntlmssp(charset_t from, charset_t to, void const *src, size_t srclen, void *dest, size_t destlen, bool allow_badcharcnv)
Definition: charcnv.c:441
void init_iconv_ntlmssp(void)
Definition: charcnv.c:214
unsigned int bool
Definition: charcnv.c:57
void log_legacy_write(const char *format,...)
Legacy function to write a log message.
smb_iconv_t smb_iconv_open_ntlmssp(const char *tocode, const char *fromcode)
Definition: iconv.c:105
#define uint16
Definition: charcnv.c:50
#define EILSEQ
Definition: iconv.h:50
size_t smb_iconv_ntlmssp(smb_iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
Definition: iconv.c:59
#define COPY_UCS2_CHAR(dest, src)
Definition: smb.h:162
#define True
Definition: charcnv.c:59
#define False
Definition: charcnv.c:58
int smb_iconv_close_ntlmssp(smb_iconv_t cd)
Definition: iconv.c:195
void lazy_initialize_conv_ntlmssp(void)
Definition: charcnv.c:196
size_t strlen_w_ntlmssp(const uint16 *src)
Definition: charcnv.c:146
#define uint8
Definition: charcnv.c:46
#define NUM_CHARSETS
Definition: charset.h:32