bes  Updated for version 3.20.5
SaxParserWrapper.cc
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "SaxParserWrapper.h"
31 
32 #include <exception>
33 #include <iostream>
34 #include <libxml/parser.h>
35 #include <libxml/xmlstring.h>
36 #include <stdio.h> // for vsnprintf
37 #include <string>
38 
39 #include "BESDebug.h"
40 #include "BESError.h"
41 #include "BESInternalError.h"
42 #include "BESInternalFatalError.h"
43 #include "BESSyntaxUserError.h"
44 #include "BESForbiddenError.h"
45 #include "BESNotFoundError.h"
46 #include "NCMLDebug.h"
47 #include "SaxParser.h"
48 #include "XMLHelpers.h"
49 
50 // Toggle to tell the parser to use the Sax2 start/end element
51 // calls with namespace information.
52 // [ TODO We probably want to remove the non-namespace pathways at some point,
53 // but I will leave them here for now in case there's issues ]
54 #define NCML_PARSER_USE_SAX2_NAMESPACES 1
55 
56 using namespace std;
57 using namespace ncml_module;
58 
60 // Helpers
61 
62 #if NCML_PARSER_USE_SAX2_NAMESPACES
63 static const int SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE = 5;
64 static int toXMLAttributeMapWithNamespaces(XMLAttributeMap& attrMap, const xmlChar** attributes, int num_attributes)
65 {
66  attrMap.clear();
67  for (int i = 0; i < num_attributes; ++i) {
68  XMLAttribute attr;
69  attr.fromSAX2NamespaceAttributes(attributes);
70  attributes += SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE; // jump to start of next record
71  attrMap.addAttribute(attr);
72  }
73  return num_attributes;
74 }
75 #else
76 // Assumes the non-namespace calls, so attrs is stride 2 {name,value}
77 static int toXMLAttributeMapNoNamespaces(XMLAttributeMap& attrMap, const xmlChar** attrs)
78 {
79  attrMap.clear();
80  int count=0;
81  while (attrs && *attrs != NULL)
82  {
83  XMLAttribute attr;
84  attr.localname = XMLUtil::xmlCharToString(*attrs);
85  attr.value = XMLUtil::xmlCharToString(*(attrs+1));
86  attrMap.addAttribute(attr);
87  attrs += 2;
88  count++;
89  }
90  return count;
91 }
92 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
93 
95 // Callback we will register that just pass on to our C++ engine
96 //
97 // NOTE WELL: New C handlers need to follow the given
98 // other examples in order to avoid memory leaks
99 // in libxml during an exception!
100 
101 // To avoid cut & paste below, we use this macro to cast the void* into the wrapper and
102 // set up a proper error handling structure around the main call.
103 // The macro internally defines the symbol "parser" to the SaxParser contained in the wrapper.
104 // So for example, a safe handler call to SaxParser would look like:
105 // static void ncmlStartDocument(void* userData)
106 //{
107 // BEGIN_SAFE_HANDLER_CALL(userData); // pass in the void*, which is a SaxParserWrapper*
108 // parser.onStartDocument(); // call the dispatch on the wrapped parser using the autodefined name parser
109 // END_SAFE_HANDLER_CALL; // end the error handling wrapper
110 //}
111 
112 #define BEGIN_SAFE_PARSER_BLOCK(argName) { \
113  SaxParserWrapper* _spw_ = static_cast<SaxParserWrapper*>(argName); \
114  if (_spw_->isExceptionState()) \
115  { \
116  return; \
117  } \
118  else \
119  { \
120  try \
121  { \
122  SaxParser& parser = _spw_->getParser(); \
123  parser.setParseLineNumber(_spw_->getCurrentParseLine());
124 
125 // This is required after the end of the actual calls to the parser.
126 #define END_SAFE_PARSER_BLOCK } \
127  catch (BESError& theErr) \
128  { \
129  BESDEBUG("ncml", "Caught BESError&, deferring..." << endl); \
130  _spw_->deferException(theErr); \
131  } \
132  catch (std::exception& ex) \
133  { \
134  BESDEBUG("ncml", "Caught std::exception&, wrapping and deferring..." << endl); \
135  BESInternalError _badness_("Wrapped std::exception.what()=" + string(ex.what()), __FILE__, __LINE__);\
136  _spw_->deferException(_badness_); \
137  } \
138  catch (...) \
139  { \
140  BESDEBUG("ncml", "Caught unknown (...) exception: deferring default error." << endl); \
141  BESInternalError _badness_("SaxParserWrapper:: Unknown Exception Type: ", __FILE__, __LINE__); \
142  _spw_->deferException(_badness_); \
143  } \
144  } \
145 }
146 
148 // Our C SAX callbacks, wrapped carefully.
149 
150 static void ncmlStartDocument(void* userData)
151 {
152  BEGIN_SAFE_PARSER_BLOCK(userData)
153 
154  parser.onStartDocument();
155 
156  END_SAFE_PARSER_BLOCK;
157 }
158 
159 static void ncmlEndDocument(void* userData)
160 {
161  BEGIN_SAFE_PARSER_BLOCK(userData)
162 ; // BESDEBUG("ncml", "SaxParserWrapper::ncmlEndDocument() - BEGIN"<< endl);
163 
164  parser.onEndDocument();
165 
166  // BESDEBUG("ncml", "SaxParserWrapper::ncmlEndDocument() - END"<< endl);
167 
168  END_SAFE_PARSER_BLOCK;
169 }
170 
171 #if !NCML_PARSER_USE_SAX2_NAMESPACES
172 
173 static void ncmlStartElement(void * userData,
174  const xmlChar * name,
175  const xmlChar ** attrs)
176 {
177  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
178  BEGIN_SAFE_PARSER_BLOCK(userData);
179 
180  string nameS = XMLUtil::xmlCharToString(name);
181  XMLAttributeMap map;
182  toXMLAttributeMapNoNamespaces(map, attrs);
183 
184  // These args will be valid for the scope of the call.
185  parser.onStartElement(nameS, map);
186 
187  END_SAFE_PARSER_BLOCK;
188 }
189 
190 static void ncmlEndElement(void * userData,
191  const xmlChar * name)
192 {
193  BEGIN_SAFE_PARSER_BLOCK(userData);
194 
195  string nameS = XMLUtil::xmlCharToString(name);
196  parser.onEndElement(nameS);
197 
198  END_SAFE_PARSER_BLOCK;
199 }
200 #endif // !NCML_PARSER_USE_SAX2_NAMESPACES
201 
202 #if NCML_PARSER_USE_SAX2_NAMESPACES
203 static
204 void ncmlSax2StartElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
205  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /* nb_defaulted */,
206  const xmlChar **attributes)
207 {
208  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
209  BEGIN_SAFE_PARSER_BLOCK(userData);
210  BESDEBUG("ncml", "SaxParserWrapper::ncmlSax2StartElementNs() - localname:" << localname << endl);
211 
212  XMLAttributeMap attrMap;
213  toXMLAttributeMapWithNamespaces(attrMap, attributes, nb_attributes);
214 
215  XMLNamespaceMap nsMap;
216  nsMap.fromSAX2Namespaces(namespaces, nb_namespaces);
217 
218  // These args will be valid for the scope of the call.
219  string localnameString = XMLUtil::xmlCharToString(localname);
220  string prefixString = XMLUtil::xmlCharToString(prefix);
221  string uriString = XMLUtil::xmlCharToString(URI);
222 
223  parser.onStartElementWithNamespace(
224  localnameString,
225  prefixString,
226  uriString,
227  attrMap,
228  nsMap);
229 
230  END_SAFE_PARSER_BLOCK;
231 }
232 
233 static
234 void ncmlSax2EndElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)
235 {
236  BEGIN_SAFE_PARSER_BLOCK(userData);
237 
238  string localnameString = XMLUtil::xmlCharToString(localname);
239  string prefixString = XMLUtil::xmlCharToString(prefix);
240  string uriString = XMLUtil::xmlCharToString(URI);
241  parser.onEndElementWithNamespace(localnameString, prefixString, uriString);
242 
243  END_SAFE_PARSER_BLOCK;
244 }
245 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
246 
247 static void ncmlCharacters(void* userData, const xmlChar* content, int len)
248 {
249  BEGIN_SAFE_PARSER_BLOCK(userData);
250 
251  // len is since the content string might not be null terminated,
252  // so we have to build out own and pass it up special....
253  // TODO consider just using these xmlChar's upstairs to avoid copies, or make an adapter or something.
254  string characters("");
255  characters.reserve(len);
256  const xmlChar* contentEnd = content+len;
257  while(content != contentEnd)
258  {
259  characters += (const char)(*content++);
260  }
261 
262  parser.onCharacters(characters);
263 
264  END_SAFE_PARSER_BLOCK;
265 }
266 
267 static void ncmlWarning(void* userData, const char* msg, ...)
268 {
269  BEGIN_SAFE_PARSER_BLOCK(userData);
270 
271  BESDEBUG("ncml", "SaxParserWrapper::ncmlWarning() - msg:" << msg << endl);
272 
273  char buffer[1024];
274  va_list(args);
275  va_start(args, msg);
276  unsigned int len = sizeof(buffer);
277  vsnprintf(buffer, len, msg, args);
278  va_end(args);
279  parser.onParseWarning(string(buffer));
280  END_SAFE_PARSER_BLOCK;
281 }
282 
283 static void ncmlFatalError(void* userData, const char* msg, ...)
284 {
285  BEGIN_SAFE_PARSER_BLOCK(userData);
286 
287  BESDEBUG("ncml", "SaxParserWrapper::ncmlFatalError() - msg:" << msg << endl);
288 
289  char buffer[1024];
290  va_list(args);
291  va_start(args, msg);
292  unsigned int len = sizeof(buffer);
293  vsnprintf(buffer, len, msg, args);
294  va_end(args);
295  parser.onParseError(string(buffer));
296 
297  END_SAFE_PARSER_BLOCK;
298 }
299 
301 // class SaxParserWrapper impl
302 
303 SaxParserWrapper::SaxParserWrapper(SaxParser& parser) :
304  _parser(parser), _handler(), // inits to all nulls.
305  /*_context(0),*/ _state(NOT_PARSING), _errorMsg(""), _errorType(0), _errorFile(""), _errorLine(-1)
306 {
307 }
308 
309 SaxParserWrapper::~SaxParserWrapper()
310 {
311  // Really not much to do... everything cleans itself up.
312  _state = NOT_PARSING;
313 #if 0
314  // Leak fix. jhrg 6/21/19
315  cleanupParser();
316 #endif
317 
318 }
319 
320 bool SaxParserWrapper::parse(const string& ncmlFilename)
321 {
322  bool success = true;
323 
324  // It's illegal to call this until it's done.
325  if (_state == PARSING) {
326  throw BESInternalError("Parse called again while already in parse.", __FILE__, __LINE__);
327  }
328 
329  // OK, now we're parsing
330  _state = PARSING;
331 
332 
333  setupParser(ncmlFilename);
334 
335  success = xmlSAXUserParseFile(&_handler, this, ncmlFilename.c_str());
336 
337 #if 0
338  // Old way where we have no context.
339  // int errNo = xmlSAXUserParseFile(&_handler, this, ncmlFilename.c_str());
340  // success = (errNo == 0);
341 
342  // Any BESError thrown in SaxParser callbacks will be deferred by the safe handler blocks
343  // So that we safely pass this line.
344  // Even if not, _context is cleared in dtor just in case.
345  xmlParseDocument(_context);
346 
347  success = (_context->errNo == 0);
348 
349  cleanupParser();
350 #endif
351 
352  // If we deferred an exception during the libxml parse call, now's the time to rethrow it.
353  if (isExceptionState()) {
355  }
356 
357  // Otherwise, we're also done parsing.
358  _state = NOT_PARSING;
359  return success;
360 }
361 
363 {
364  _state = EXCEPTION;
365  _errorType = theErr.get_bes_error_type();
366  _errorMsg = theErr.get_message();
367  _errorLine = theErr.get_line();
368  _errorFile = theErr.get_file();
369 }
370 
371 // HACK admittedly a little gross, but it's weird to have to copy an exception
372 // and this seemed the safest way rather than making dynamic storage, etc.
374 {
375  // Clear our state out so we can parse again though.
376  _state = NOT_PARSING;
377 
378  switch (_errorType) {
379  case BES_INTERNAL_ERROR:
380  throw BESInternalError(_errorMsg, _errorFile, _errorLine);
381  break;
382 
383  case BES_INTERNAL_FATAL_ERROR:
384  throw BESInternalFatalError(_errorMsg, _errorFile, _errorLine);
385  break;
386 
387  case BES_SYNTAX_USER_ERROR:
388  throw BESSyntaxUserError(_errorMsg, _errorFile, _errorLine);
389  break;
390 
391  case BES_FORBIDDEN_ERROR:
392  throw BESForbiddenError(_errorMsg, _errorFile, _errorLine);
393  break;
394 
395  case BES_NOT_FOUND_ERROR:
396  throw BESNotFoundError(_errorMsg, _errorFile, _errorLine);
397  break;
398 
399  default:
400  throw BESInternalError("Unknown exception type.", __FILE__, __LINE__);
401  break;
402  }
403 }
404 
406 {
407 #if 0
408  if (_context) {
409  return xmlSAX2GetLineNumber(_context);
410  }
411  else {
412  return -1;
413  }
414 #endif
415  return -1; //FIXME part of leak fix. jhrg 6.21.19
416 }
417 
418 static void setAllHandlerCBToNulls(xmlSAXHandler& h)
419 {
420  h.internalSubset = 0;
421  h.isStandalone = 0;
422  h.hasInternalSubset = 0;
423  h.hasExternalSubset = 0;
424  h.resolveEntity = 0;
425  h.getEntity = 0;
426  h.entityDecl = 0;
427  h.notationDecl = 0;
428  h.attributeDecl = 0;
429  h.elementDecl = 0;
430  h.unparsedEntityDecl = 0;
431  h.setDocumentLocator = 0;
432  h.startDocument = 0;
433  h.endDocument = 0;
434  h.startElement = 0;
435  h.endElement = 0;
436  h.reference = 0;
437  h.characters = 0;
438  h.ignorableWhitespace = 0;
439  h.processingInstruction = 0;
440  h.comment = 0;
441  h.warning = 0;
442  h.error = 0;
443  h.fatalError = 0;
444  h.getParameterEntity = 0;
445  h.cdataBlock = 0;
446  h.externalSubset = 0;
447 
448  // unsigned int initialized; magic number the init should fill in
449  /* The following fields are extensions available only on version 2 */
450  // void *_private; //i'd assume i don't set this either...
451  h.startElementNs = 0;
452  h.endElementNs = 0;
453  h.serror = 0;
454 }
455 
456 void SaxParserWrapper::setupParser(const string& filename)
457 {
458  // setup the handler for version 2,
459  // which sets an internal version magic number
460  // into _handler.initialized
461  // but which doesn't clear the handlers to 0.
462  xmlSAXVersion(&_handler, 2);
463 
464  // Initialize all handlers to 0 by hand to start
465  // so we don't blow those internal magic numbers.
466  setAllHandlerCBToNulls(_handler);
467 
468  // Put our static functions into the handler
469  _handler.startDocument = ncmlStartDocument;
470  _handler.endDocument = ncmlEndDocument;
471  _handler.warning = ncmlWarning;
472  _handler.error = ncmlFatalError;
473  _handler.fatalError = ncmlFatalError;
474  _handler.characters = ncmlCharacters;
475 
476  // We'll use one or the other until we're sure it works.
477 #if NCML_PARSER_USE_SAX2_NAMESPACES
478  _handler.startElement = 0;
479  _handler.endElement = 0;
480  _handler.startElementNs = ncmlSax2StartElementNs;
481  _handler.endElementNs = ncmlSax2EndElementNs;
482 #else
483  _handler.startElement = ncmlStartElement;
484  _handler.endElement = ncmlEndElement;
485  _handler.startElementNs = 0;
486  _handler.endElementNs = 0;
487 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
488 
489  // Create the non-validating parser context for the file
490  // using this as the userData for making exception-safe
491  // C++ calls.
492 
493 #if 0
494  // Leak fix. jhrg 6/21/19
495  _context = xmlCreateFileParserCtxt(filename.c_str());
496  if (!_context) {
497  THROW_NCML_PARSE_ERROR(-1, "Cannot parse: Unable to create a libxml parse context for " + filename);
498  }
499  _context->sax = &_handler;
500  _context->userData = this;
501  _context->validate = false;
502 #endif
503 }
504 
505 #if 0
506 // Leak fix. jhrg 6/21/19
507 void SaxParserWrapper::cleanupParser() throw ()
508 {
509 #if 0
510  // Leak fix. jhrg 6/21/19
511  if (_context) {
512  // Remove our handler from it.
513  _context->sax = NULL;
514 
515  // Free it up.
516  xmlFreeParserCtxt(_context);
517  _context = 0;
518  }
519 #endif
520 }
521 #endif
522 
523 
error thrown if the resource requested cannot be found
exception thrown if an internal error is found and is fatal to the BES
exception thrown if inernal error encountered
void deferException(BESError &theErr)
The remaining calls are for the internals of the parser, but need to be public.
NcML Parser for adding/modifying/removing metadata (attributes) to existing local datasets using NcML...
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
bool parse(const string &ncmlFilename)
Do a SAX parse of the ncmlFilename and pass the calls to wrapper parser.
error thrown if there is a user syntax error in the request or any other user error
void fromSAX2Namespaces(const xmlChar **pNamespaces, int numNamespaces)
Definition: XMLHelpers.cc:318
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
void addAttribute(const XMLAttribute &attribute)
Definition: XMLHelpers.cc:165
error thrown if the BES is not allowed to access the resource requested
virtual std::string get_file()
get the file name where the exception was thrown
Definition: BESError.h:107
virtual int get_bes_error_type()
Return the return code for this error class.
Definition: BESError.h:143
Interface class for the wrapper between libxml C SAX parser and our NCMLParser.
Definition: SaxParser.h:48
void fromSAX2NamespaceAttributes(const xmlChar **chunkOfFivePointers)
Definition: XMLHelpers.cc:92
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:115