libdap++  Updated for version 3.14.0
DDXParserSAX2.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 //#define DODS_DEBUG 1
29 //#define DODS_DEBUG2 1
30 
31 #include <cstring>
32 #include <cstdarg>
33 
34 #include "BaseType.h"
35 #include "Byte.h"
36 #include "Int16.h"
37 #include "UInt16.h"
38 #include "Int32.h"
39 #include "UInt32.h"
40 #include "Float32.h"
41 #include "Float64.h"
42 #include "Str.h"
43 #include "Url.h"
44 #include "Array.h"
45 #include "Structure.h"
46 #include "Sequence.h"
47 #include "Grid.h"
48 
49 #include "DDXParserSAX2.h"
50 
51 #include "util.h"
52 #include "mime_util.h"
53 #include "debug.h"
54 
55 namespace libdap {
56 
57 #if defined(DODS_DEBUG) || defined(DODS_DEUG2)
58 static const char *states[] =
59  {
60  "start",
61 
62  "dataset",
63 
64  "attribute_container",
65  "attribute",
66  "attribute_value",
67  "other_xml_attribute",
68 
69  "alias",
70 
71  "simple_type",
72 
73  "array",
74  "dimension",
75 
76  "grid",
77  "map",
78 
79  "structure",
80  "sequence",
81 
82  "blob href",
83 
84  "unknown",
85  "error"
86  };
87 #endif
88 // Glue the BaseTypeFactory to the enum-based factory defined statically
89 // here.
90 
91 BaseType *DDXParser::factory(Type t, const string & name)
92 {
93  switch (t) {
94  case dods_byte_c:
95  return d_factory->NewByte(name);
96  break;
97 
98  case dods_int16_c:
99  return d_factory->NewInt16(name);
100  break;
101 
102  case dods_uint16_c:
103  return d_factory->NewUInt16(name);
104  break;
105 
106  case dods_int32_c:
107  return d_factory->NewInt32(name);
108  break;
109 
110  case dods_uint32_c:
111  return d_factory->NewUInt32(name);
112  break;
113 
114  case dods_float32_c:
115  return d_factory->NewFloat32(name);
116  break;
117 
118  case dods_float64_c:
119  return d_factory->NewFloat64(name);
120  break;
121 
122  case dods_str_c:
123  return d_factory->NewStr(name);
124  break;
125 
126  case dods_url_c:
127  return d_factory->NewUrl(name);
128  break;
129 
130  case dods_array_c:
131  return d_factory->NewArray(name);
132  break;
133 
134  case dods_structure_c:
135  return d_factory->NewStructure(name);
136  break;
137 
138  case dods_sequence_c:
139  return d_factory->NewSequence(name);
140  break;
141 
142  case dods_grid_c:
143  return d_factory->NewGrid(name);
144  break;
145 
146  default:
147  return 0;
148  }
149 }
150 
151 #if 0
152 
153 static Type get_type(const char *name)
154 {
155  if (strcmp(name, "Byte") == 0)
156  return dods_byte_c;
157 
158  if (strcmp(name, "Int16") == 0)
159  return dods_int16_c;
160 
161  if (strcmp(name, "UInt16") == 0)
162  return dods_uint16_c;
163 
164  if (strcmp(name, "Int32") == 0)
165  return dods_int32_c;
166 
167  if (strcmp(name, "UInt32") == 0)
168  return dods_uint32_c;
169 
170  if (strcmp(name, "Float32") == 0)
171  return dods_float32_c;
172 
173  if (strcmp(name, "Float64") == 0)
174  return dods_float64_c;
175 
176  if (strcmp(name, "String") == 0)
177  return dods_str_c;
178 
179  if (strcmp(name, "Url") == 0)
180  return dods_url_c;
181 
182  if (strcmp(name, "Array") == 0)
183  return dods_array_c;
184 
185  if (strcmp(name, "Structure") == 0)
186  return dods_structure_c;
187 
188  if (strcmp(name, "Sequence") == 0)
189  return dods_sequence_c;
190 
191  if (strcmp(name, "Grid") == 0)
192  return dods_grid_c;
193 
194  return dods_null_c;
195 }
196 #endif
197 
198 #if 0
199 // Not used. jhrg 1/17/13
200 static Type is_simple_type(const char *name)
201 {
202  Type t = get_type(name);
203  switch (t) {
204  case dods_byte_c:
205  case dods_int16_c:
206  case dods_uint16_c:
207  case dods_int32_c:
208  case dods_uint32_c:
209  case dods_float32_c:
210  case dods_float64_c:
211  case dods_str_c:
212  case dods_url_c:
213  return t;
214  default:
215  return dods_null_c;
216  }
217 }
218 #endif
219 
220 static bool is_not(const char *name, const char *tag)
221 {
222  return strcmp(name, tag) != 0;
223 }
224 
225 void DDXParser::set_state(DDXParser::ParseState state)
226 {
227  s.push(state);
228 }
229 
230 DDXParser::ParseState DDXParser::get_state() const
231 {
232  return s.top();
233 }
234 
235 void DDXParser::pop_state()
236 {
237  s.pop();
238 }
239 
243 void DDXParser::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
244 {
245  if (!attribute_table.empty())
246  attribute_table.clear(); // erase old attributes
247 
248  unsigned int index = 0;
249  for (int i = 0; i < nb_attributes; ++i, index += 5) {
250  // Make a value using the attribute name and the prefix, namespace URI
251  // and the value. The prefix might be null.
252  attribute_table.insert(map<string, XMLAttribute>::value_type(
253  string((const char *)attributes[index]),
254  XMLAttribute(attributes + index + 1)));
255 
256  DBG(cerr << "Attribute '" << (const char *)attributes[index] << "': "
257  << attribute_table[(const char *)attributes[index]].value << endl);
258  }
259 }
260 
261 void DDXParser::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
262 {
263  for (int i = 0; i < nb_namespaces; ++i ) {
264  // make a value with the prefix and namespace URI. The prefix might be
265  // null.
266  namespace_table.insert(map<string,string>::value_type(
267  namespaces[i*2] != 0 ? (const char *)namespaces[i*2] : "",
268  (const char *)namespaces[i*2+1]));
269  }
270 }
271 
276 bool DDXParser::check_required_attribute(const string & attr)
277 {
278  map < string, XMLAttribute >::iterator i = attribute_table.find(attr);
279  if (i == attribute_table.end())
280  ddx_fatal_error(this, "Required attribute '%s' not found.",
281  attr.c_str());
282  return true;
283 }
284 
290 bool DDXParser::check_attribute(const string & attr)
291 {
292  return (attribute_table.find(attr) != attribute_table.end());
293 }
294 
303 void DDXParser::process_attribute_element(const xmlChar **attrs, int nb_attributes)
304 {
305  // These methods set the state to parser_error if a problem is found.
306  transfer_xml_attrs(attrs, nb_attributes);
307 
308  bool error = !(check_required_attribute(string("name"))
309  && check_required_attribute(string("type")));
310  if (error)
311  return;
312 
313  if (attribute_table["type"].value == "Container") {
314  set_state(inside_attribute_container);
315 
316  AttrTable *child;
317  AttrTable *parent = at_stack.top();
318 
319  child = parent->append_container(attribute_table["name"].value);
320  at_stack.push(child); // save.
321  DBG2(cerr << "Pushing at" << endl);
322  }
323  else if (attribute_table["type"].value == "OtherXML") {
324  set_state(inside_other_xml_attribute);
325 
326  dods_attr_name = attribute_table["name"].value;
327  dods_attr_type = attribute_table["type"].value;
328  }
329  else {
330  set_state(inside_attribute);
331  // *** Modify parser. Add a special state for inside OtherXML since it
332  // does not use the <value> element.
333 
334  dods_attr_name = attribute_table["name"].value;
335  dods_attr_type = attribute_table["type"].value;
336  }
337 }
338 
342 void DDXParser::process_attribute_alias(const xmlChar **attrs, int nb_attributes)
343 {
344  transfer_xml_attrs(attrs, nb_attributes);
345  if (check_required_attribute(string("name"))
346  && check_required_attribute(string("attribute"))) {
347  set_state(inside_alias);
348  at_stack.top()->attr_alias(attribute_table["name"].value,
349  attribute_table["attribute"].value);
350  }
351 }
352 
360 void DDXParser::process_variable(Type t, ParseState s, const xmlChar **attrs,
361  int nb_attributes)
362 {
363  transfer_xml_attrs(attrs, nb_attributes);
364 
365  set_state(s);
366 
367  if (bt_stack.top()->type() == dods_array_c
368  || check_required_attribute("name")) { // throws on error/false
369  BaseType *btp = factory(t, attribute_table["name"].value);
370  if (!btp)
372  this,
373  "Internal parser error; could not instantiate the variable '%s'.",
374  attribute_table["name"].value.c_str());
375 
376  // Once we make the new variable, we not only load it on to the
377  // BaseType stack, we also load its AttrTable on the AttrTable stack.
378  // The attribute processing software always operates on the AttrTable
379  // at the top of the AttrTable stack (at_stack).
380  bt_stack.push(btp);
381  at_stack.push(&btp->get_attr_table());
382  }
383 }
384 
388 void DDXParser::process_dimension(const xmlChar **attrs, int nb_attributes)
389 {
390  transfer_xml_attrs(attrs, nb_attributes);
391  if (check_required_attribute(string("size"))) {
392  set_state(inside_dimension);
393  Array *ap = dynamic_cast < Array * >(bt_stack.top());
394  if (!ap) {
395  ddx_fatal_error(this, "Parse error: Expected an array variable.");
396  return;
397  }
398 
399  ap->append_dim(atoi(attribute_table["size"].value.c_str()),
400  attribute_table["name"].value);
401  }
402 }
403 
406 void DDXParser::process_blob(const xmlChar **attrs, int nb_attributes)
407 {
408  transfer_xml_attrs(attrs, nb_attributes);
409  if (check_required_attribute(string("href"))) {
410  set_state(inside_blob_href);
411  *blob_href = attribute_table["href"].value;
412  }
413 }
414 
421 inline bool
422 DDXParser::is_attribute_or_alias(const char *name, const xmlChar **attrs,
423  int nb_attributes)
424 {
425  if (strcmp(name, "Attribute") == 0) {
426  process_attribute_element(attrs, nb_attributes);
427  // next state: inside_attribtue or inside_attribute_container
428  return true;
429  }
430  else if (strcmp(name, "Alias") == 0) {
431  process_attribute_alias(attrs, nb_attributes);
432  // next state: inside_alias
433  return true;
434  }
435 
436  return false;
437 }
438 
444 inline bool DDXParser::is_variable(const char *name, const xmlChar **attrs,
445  int nb_attributes)
446 {
447  Type t = get_type(name);
448  //if ((t = is_simple_type(name)) != dods_null_c) {
449  if (is_simple_type(t)) {
450  process_variable(t, inside_simple_type, attrs, nb_attributes);
451  return true;
452  }
453  else if (strcmp(name, "Array") == 0) {
454  process_variable(dods_array_c, inside_array, attrs, nb_attributes);
455  return true;
456  }
457  else if (strcmp(name, "Structure") == 0) {
458  process_variable(dods_structure_c, inside_structure, attrs, nb_attributes);
459  return true;
460  }
461  else if (strcmp(name, "Sequence") == 0) {
462  process_variable(dods_sequence_c, inside_sequence, attrs, nb_attributes);
463  return true;
464  }
465  else if (strcmp(name, "Grid") == 0) {
466  process_variable(dods_grid_c, inside_grid, attrs, nb_attributes);
467  return true;
468  }
469 
470  return false;
471 }
472 
473 void DDXParser::finish_variable(const char *tag, Type t, const char *expected)
474 {
475  if (strcmp(tag, expected) != 0) {
477  "Expected an end tag for a %s; found '%s' instead.",
478  expected, tag);
479  return;
480  }
481 
482  pop_state();
483 
484  BaseType *btp = bt_stack.top();
485 
486  bt_stack.pop();
487  at_stack.pop();
488 
489  if (btp->type() != t) {
491  "Internal error: Expected a %s variable.",
492  expected);
493  delete btp;
494  return;
495  }
496  // Once libxml2 validates, this can go away. 05/30/03 jhrg
497  if (t == dods_array_c
498  && static_cast<Array*>(btp)->dimensions() == 0) {
500  "No dimension element included in the Array '%s'.",
501  btp->name().c_str());
502  delete btp;
503  return;
504  }
505 
506  BaseType *parent = bt_stack.top();
507 
508  if (!(parent->is_vector_type() || parent->is_constructor_type())) {
510  "Tried to add the array variable '%s' to a non-constructor type (%s %s).",
511  tag,
512  bt_stack.top()->type_name().c_str(),
513  bt_stack.top()->name().c_str());
514  delete btp;
515  return;
516  }
517 
518  parent->add_var_nocopy(btp);
519 }
520 
527 
533 {
534  DDXParser *parser = static_cast<DDXParser*>(p);
535  parser->error_msg = "";
536  parser->char_data = "";
537 
538  // init attr table stack.
539  parser->at_stack.push(&parser->dds->get_attr_table());
540 
541  // Trick; DDS *should* be a child of Structure. To simplify parsing,
542  // stuff a Structure on the bt_stack and dump the top level variables
543  // there. Once we're done, transfer the variables to the DDS.
544  parser->bt_stack.push(new Structure("dummy_dds"));
545 
546  parser->set_state(parser_start);
547 
548  DBG2(cerr << "Parser state: " << states[parser->get_state()] << endl);
549 }
550 
554 {
555  DDXParser *parser = static_cast<DDXParser*>(p);
556  DBG2(cerr << "Ending state == " << states[parser->get_state()] <<
557  endl);
558 
559  if (parser->get_state() != parser_start)
560  DDXParser::ddx_fatal_error(parser, "The document contained unbalanced tags.");
561 
562  // If we've found any sort of error, don't make the DDX; intern() will
563  // take care of the error.
564  if (parser->get_state() == parser_error) {
565  return;
566  }
567 
568  // Pop the temporary Structure off the stack and transfer its variables
569  // to the DDS.
570  Constructor *cp = dynamic_cast < Constructor * >(parser->bt_stack.top());
571  if (!cp) {
572  delete parser->bt_stack.top();
573  parser->bt_stack.pop();
574  ddx_fatal_error(parser, "Parse error: Expected a Structure, Sequence or Grid variable.");
575  return;
576  }
577 
578  for (Constructor::Vars_iter i = cp->var_begin(); i != cp->var_end(); ++i) {
579  (*i)->set_parent(0); // top-level vars have no parents
580  parser->dds->add_var(*i);
581  }
582 
583  delete parser->bt_stack.top();
584  parser->bt_stack.pop();
585 }
586 
588  const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
589  int nb_namespaces, const xmlChar **namespaces,
590  int nb_attributes, int /*nb_defaulted*/, const xmlChar **attributes)
591 {
592  DDXParser *parser = static_cast<DDXParser*>(p);
593  const char *localname = (const char *)l;
594 
595  DBG2(cerr << "start element: " << localname << ", states: "
596  << states[parser->get_state()]);
597 
598  switch (parser->get_state()) {
599  case parser_start:
600  if (strcmp(localname, "Dataset") == 0) {
601  parser->set_state(inside_dataset);
602  parser->root_ns = URI != 0 ? (const char *)URI: "";
603  parser->transfer_xml_attrs(attributes, nb_attributes);
604 
605  if (parser->check_required_attribute(string("name")))
606  parser->dds->set_dataset_name(parser->attribute_table["name"].value);
607 
608  if (parser->check_attribute("dapVersion"))
609  parser->dds->set_dap_version(parser->attribute_table["dapVersion"].value);
610  }
611  else
613  "Expected response to start with a Dataset element; found '%s' instead.",
614  localname);
615  break;
616 
617  case inside_dataset:
618  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
619  break;
620  else if (parser->is_variable(localname, attributes, nb_attributes))
621  break;
622  else if (strcmp(localname, "blob") == 0 || strcmp(localname, "dataBLOB") == 0) {
623  parser->process_blob(attributes, nb_attributes);
624  // next state: inside_data_blob
625  }
626  else
628  "Expected an Attribute, Alias or variable element; found '%s' instead.",
629  localname);
630  break;
631 
632  case inside_attribute_container:
633  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
634  break;
635  else
637  "Expected an Attribute or Alias element; found '%s' instead.",
638  localname);
639  break;
640 
641  case inside_attribute:
642  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
643  break;
644  else if (strcmp(localname, "value") == 0)
645  parser->set_state(inside_attribute_value);
646  else
647  ddx_fatal_error(parser,
648  "Expected an 'Attribute', 'Alias' or 'value' element; found '%s' instead.",
649  localname);
650  break;
651 
652  case inside_attribute_value:
653  ddx_fatal_error(parser,
654  "Internal parser error; unexpected state, inside value while processing element '%s'.",
655  localname);
656  break;
657 
658  case inside_other_xml_attribute:
659  DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname << endl);
660 
661  parser->other_xml_depth++;
662 
663  // Accumulate the elements here
664 
665  parser->other_xml.append("<");
666  if (prefix) {
667  parser->other_xml.append((const char *)prefix);
668  parser->other_xml.append(":");
669  }
670  parser->other_xml.append(localname);
671 
672  if (nb_namespaces != 0) {
673  parser->transfer_xml_ns(namespaces, nb_namespaces);
674 
675  for (map<string,string>::iterator i = parser->namespace_table.begin();
676  i != parser->namespace_table.end();
677  ++i) {
678  parser->other_xml.append(" xmlns");
679  if (!i->first.empty()) {
680  parser->other_xml.append(":");
681  parser->other_xml.append(i->first);
682  }
683  parser->other_xml.append("=\"");
684  parser->other_xml.append(i->second);
685  parser->other_xml.append("\"");
686  }
687  }
688 
689  if (nb_attributes != 0) {
690  parser->transfer_xml_attrs(attributes, nb_attributes);
691  for (XMLAttrMap::iterator i = parser->attr_table_begin();
692  i != parser->attr_table_end();
693  ++i) {
694  parser->other_xml.append(" ");
695  if (!i->second.prefix.empty()) {
696  parser->other_xml.append(i->second.prefix);
697  parser->other_xml.append(":");
698  }
699  parser->other_xml.append(i->first);
700  parser->other_xml.append("=\"");
701  parser->other_xml.append(i->second.value);
702  parser->other_xml.append("\"");
703  }
704  }
705 
706  parser->other_xml.append(">");
707  break;
708 
709  case inside_alias:
710  ddx_fatal_error(parser,
711  "Internal parser error; unexpected state, inside alias while processing element '%s'.",
712  localname);
713  break;
714 
715  case inside_simple_type:
716  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
717  break;
718  else
719  ddx_fatal_error(parser,
720  "Expected an 'Attribute' or 'Alias' element; found '%s' instead.",
721  localname);
722  break;
723 
724  case inside_array:
725  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
726  break;
727  else if (is_not(localname, "Array")
728  && parser->is_variable(localname, attributes, nb_attributes))
729  break;
730  else if (strcmp(localname, "dimension") == 0) {
731  parser->process_dimension(attributes, nb_attributes);
732  // next state: inside_dimension
733  }
734  else
735  ddx_fatal_error(parser,
736  "Expected an 'Attribute' or 'Alias' element; found '%s' instead.",
737  localname);
738  break;
739 
740  case inside_dimension:
741  ddx_fatal_error(parser,
742  "Internal parser error; unexpected state, inside dimension while processing element '%s'.",
743  localname);
744  break;
745 
746  case inside_structure:
747  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
748  break;
749  else if (parser->is_variable(localname, attributes, nb_attributes))
750  break;
751  else
753  "Expected an Attribute, Alias or variable element; found '%s' instead.",
754  localname);
755  break;
756 
757  case inside_sequence:
758  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
759  break;
760  else if (parser->is_variable(localname, attributes, nb_attributes))
761  break;
762  else
764  "Expected an Attribute, Alias or variable element; found '%s' instead.",
765  localname);
766  break;
767 
768  case inside_grid:
769  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
770  break;
771  else if (strcmp(localname, "Array") == 0)
772  parser->process_variable(dods_array_c, inside_array, attributes, nb_attributes);
773  else if (strcmp(localname, "Map") == 0)
774  parser->process_variable(dods_array_c, inside_map, attributes, nb_attributes);
775  else
777  "Expected an Attribute, Alias or variable element; found '%s' instead.",
778  localname);
779  break;
780 
781  case inside_map:
782  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
783  break;
784  else if (is_not(localname, "Array") && is_not(localname, "Sequence")
785  && is_not(localname, "Grid")
786  && parser->is_variable(localname, attributes, nb_attributes))
787  break;
788  else if (strcmp(localname, "dimension") == 0) {
789  parser->process_dimension(attributes, nb_attributes);
790  // next state: inside_dimension
791  }
792  else
793  ddx_fatal_error(parser,
794  "Expected an 'Attribute', 'Alias', variable or 'dimension' element; found '%s' instead.",
795  localname);
796  break;
797 
798  case inside_blob_href:
799  ddx_fatal_error(parser,
800  "Internal parser error; unexpected state, inside blob href while processing element '%s'.",
801  localname);
802  break;
803 
804  case parser_unknown:
805  // *** Never used? If so remove/error
806  parser->set_state(parser_unknown);
807  break;
808 
809  case parser_error:
810  break;
811  }
812 
813  DBGN(cerr << " ... " << states[parser->get_state()] << endl);
814 }
815 
816 void DDXParser::ddx_sax2_end_element(void *p, const xmlChar *l,
817  const xmlChar *prefix, const xmlChar *URI)
818 {
819  DDXParser *parser = static_cast<DDXParser*>(p);
820  const char *localname = (const char *)l;
821 
822  DBG2(cerr << "End element " << localname << " (state "
823  << states[parser->get_state()] << ")" << endl);
824 
825  switch (parser->get_state()) {
826  case parser_start:
827  ddx_fatal_error(parser,
828  "Internal parser error; unexpected state, inside start state while processing element '%s'.",
829  localname);
830  break;
831 
832  case inside_dataset:
833  if (strcmp(localname, "Dataset") == 0)
834  parser->pop_state();
835  else
837  "Expected an end Dataset tag; found '%s' instead.",
838  localname);
839  break;
840 
841  case inside_attribute_container:
842  if (strcmp(localname, "Attribute") == 0) {
843  parser->pop_state();
844  parser->at_stack.pop(); // pop when leaving a container.
845  }
846  else
848  "Expected an end Attribute tag; found '%s' instead.",
849  localname);
850  break;
851 
852  case inside_attribute:
853  if (strcmp(localname, "Attribute") == 0)
854  parser->pop_state();
855  else
857  "Expected an end Attribute tag; found '%s' instead.",
858  localname);
859  break;
860 
861  case inside_attribute_value:
862  if (strcmp(localname, "value") == 0) {
863  parser->pop_state();
864  AttrTable *atp = parser->at_stack.top();
865  atp->append_attr(parser->dods_attr_name,
866  parser->dods_attr_type, parser->char_data);
867  parser->char_data = ""; // Null this after use.
868  }
869  else
871  "Expected an end value tag; found '%s' instead.",
872  localname);
873 
874  break;
875 
876  case inside_other_xml_attribute: {
877  if (strcmp(localname, "Attribute") == 0
878  && parser->root_ns == (const char *)URI) {
879 
880  DBGN(cerr << endl << "\t Popping the 'inside_other_xml_attribute' state"
881  << endl);
882 
883  parser->pop_state();
884 
885  AttrTable *atp = parser->at_stack.top();
886  atp->append_attr(parser->dods_attr_name,
887  parser->dods_attr_type, parser->other_xml);
888 
889  parser->other_xml = ""; // Null this after use.
890  }
891  else {
892  DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname
893  << ", depth: " << parser->other_xml_depth << endl);
894  if (parser->other_xml_depth == 0)
896  "Expected an OtherXML attribute to end! Instead I found '%s'",
897  localname);
898  parser->other_xml_depth--;
899 
900  parser->other_xml.append("</");
901  if (prefix) {
902  parser->other_xml.append((const char *)prefix);
903  parser->other_xml.append(":");
904  }
905  parser->other_xml.append(localname);
906  parser->other_xml.append(">");
907  }
908  break;
909  }
910  // Alias is busted in libdap++ 05/29/03 jhrg
911  case inside_alias:
912  parser->pop_state();
913  break;
914 
915  case inside_simple_type: {
916  Type t = get_type(localname);
917  if (is_simple_type(t)) {
918  parser->pop_state();
919  BaseType *btp = parser->bt_stack.top();
920  parser->bt_stack.pop();
921  parser->at_stack.pop();
922 
923  BaseType *parent = parser->bt_stack.top();
924 
925  if (parent->is_vector_type() || parent->is_constructor_type()) {
926  parent->add_var(btp);
927  delete btp;
928  }
929  else {
931  "Tried to add the simple-type variable '%s' to a non-constructor type (%s %s).",
932  localname,
933  parser->bt_stack.top()->
934  type_name().c_str(),
935  parser->bt_stack.top()->name().
936  c_str());
937  delete btp;
938  }
939  }
940  else {
942  "Expected an end tag for a simple type; found '%s' instead.",
943  localname);
944  }
945  break;
946  }
947 
948  case inside_array:
949  parser->finish_variable(localname, dods_array_c, "Array");
950  break;
951 
952  case inside_dimension:
953  if (strcmp(localname, "dimension") == 0)
954  parser->pop_state();
955  else
957  "Expected an end dimension tag; found '%s' instead.",
958  localname);
959  break;
960 
961  case inside_structure:
962  parser->finish_variable(localname, dods_structure_c, "Structure");
963  break;
964 
965  case inside_sequence:
966  parser->finish_variable(localname, dods_sequence_c, "Sequence");
967  break;
968 
969  case inside_grid:
970  parser->finish_variable(localname, dods_grid_c, "Grid");
971  break;
972 
973  case inside_map:
974  parser->finish_variable(localname, dods_array_c, "Map");
975  break;
976 
977  case inside_blob_href:
978  if (strcmp(localname, "blob") == 0 || strcmp(localname, "dataBLOB") == 0)
979  parser->pop_state();
980  else
982  "Expected an end dataBLOB/blob tag; found '%s' instead.",
983  localname);
984  break;
985 
986  case parser_unknown:
987  parser->pop_state();
988  break;
989 
990  case parser_error:
991  break;
992  }
993 
994 
995  DBGN(cerr << " ... " << states[parser->get_state()] << endl);
996 }
997 
1001 void DDXParser::ddx_get_characters(void * p, const xmlChar * ch, int len)
1002 {
1003  DDXParser *parser = static_cast<DDXParser*>(p);
1004 
1005  switch (parser->get_state()) {
1006  case inside_attribute_value:
1007  parser->char_data.append((const char *)(ch), len);
1008  DBG2(cerr << "Characters: '" << parser->char_data << "'" << endl);
1009  break;
1010 
1011  case inside_other_xml_attribute:
1012  parser->other_xml.append((const char *)(ch), len);
1013  DBG2(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1014  break;
1015 
1016  default:
1017  break;
1018  }
1019 }
1020 
1025 void DDXParser::ddx_ignoreable_whitespace(void *p, const xmlChar *ch,
1026  int len)
1027 {
1028  DDXParser *parser = static_cast<DDXParser*>(p);
1029 
1030  switch (parser->get_state()) {
1031  case inside_other_xml_attribute:
1032  parser->other_xml.append((const char *)(ch), len);
1033  break;
1034 
1035  default:
1036  break;
1037  }
1038 }
1039 
1045 void DDXParser::ddx_get_cdata(void *p, const xmlChar *value, int len)
1046 {
1047  DDXParser *parser = static_cast<DDXParser*>(p);
1048 
1049  switch (parser->get_state()) {
1050  case inside_other_xml_attribute:
1051  parser->other_xml.append((const char *)(value), len);
1052  break;
1053 
1054  case parser_unknown:
1055  break;
1056 
1057  default:
1059  "Found a CData block but none are allowed by DAP.");
1060 
1061  break;
1062  }
1063 }
1064 
1069 xmlEntityPtr DDXParser::ddx_get_entity(void *, const xmlChar * name)
1070 {
1071  return xmlGetPredefinedEntity(name);
1072 }
1073 
1081 void DDXParser::ddx_fatal_error(void * p, const char *msg, ...)
1082 {
1083  va_list args;
1084  DDXParser *parser = static_cast<DDXParser*>(p);
1085 
1086  parser->set_state(parser_error);
1087 
1088  va_start(args, msg);
1089  char str[1024];
1090  vsnprintf(str, 1024, msg, args);
1091  va_end(args);
1092 
1093  int line = xmlSAX2GetLineNumber(parser->ctxt);
1094 
1095  parser->error_msg += "At line " + long_to_string(line) + ": ";
1096  parser->error_msg += string(str) + string("\n");
1097 }
1098 
1100 
1101 void DDXParser::cleanup_parse(xmlParserCtxtPtr & context)
1102 {
1103  bool wellFormed = context->wellFormed;
1104  bool valid = context->valid;
1105 
1106  context->sax = NULL;
1107  xmlFreeParserCtxt(context);
1108 
1109  // If there's an error, there may still be items on the stack at the
1110  // end of the parse.
1111  while (!bt_stack.empty()) {
1112  delete bt_stack.top();
1113  bt_stack.pop();
1114  }
1115 
1116  if (!wellFormed) {
1117  throw DDXParseFailed(string("\nThe DDX is not a well formed XML document.\n") + error_msg);
1118  }
1119 
1120  if (!valid) {
1121  throw DDXParseFailed(string("\nThe DDX is not a valid document.\n") + error_msg);
1122  }
1123 
1124  if (get_state() == parser_error) {
1125  throw DDXParseFailed(string("\nError parsing DDX response.\n") + error_msg);
1126  }
1127 }
1128 
1136 void DDXParser::intern_stream(istream &in, DDS *dest_dds, string &cid, const string &boundary)
1137 {
1138  // Code example from libxml2 docs re: read from a stream.
1139  if (!in || in.eof())
1140  throw InternalErr(__FILE__, __LINE__, "Input stream not open or read error");
1141 
1142  const int size = 1024;
1143  char chars[size + 1];
1144 
1145  // int res = fread(chars, 1, 4, in);
1146  in.readsome(chars, 4);
1147  int res = in.gcount();
1148  if (res > 0) {
1149  chars[4]='\0';
1150  xmlParserCtxtPtr context = xmlCreatePushParserCtxt(NULL, NULL, chars, res, "stream");
1151 
1152  ctxt = context; // need ctxt for error messages
1153  dds = dest_dds; // dump values here
1154  blob_href = &cid; // cid goes here
1155 
1156  xmlSAXHandler ddx_sax_parser;
1157  memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1158 
1159  ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1160  ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1161  ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1162  ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1163  ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1164  ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1165  ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1166  ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1167  ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1168  ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1169  ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1170  ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1171 
1172  context->sax = &ddx_sax_parser;
1173  context->userData = this;
1174  context->validate = true;
1175 
1176  in.getline(chars, size); // chars has size+1 elements
1177  res = in.gcount();
1178  chars[res-1] = '\n'; // libxml needs the newline; w/o it the parse will fail
1179  chars[res] = '\0';
1180  while (res > 0 && !is_boundary(chars, boundary)) {
1181  DBG(cerr << "line (" << res << "): " << chars << endl);
1182  xmlParseChunk(ctxt, chars, res, 0);
1183 
1184  in.getline(chars, size); // chars has size+1 elements
1185  res = in.gcount();
1186  if (res > 0) {
1187  chars[res-1] = '\n';
1188  chars[res] = '\0';
1189  }
1190  }
1191 
1192  // This call ends the parse: The fourth argument of xmlParseChunk is
1193  // the bool 'terminate.'
1194  xmlParseChunk(ctxt, chars, 0, 1);
1195 
1196  cleanup_parse(context);
1197  }
1198 }
1199 
1202 void DDXParser::intern_stream(FILE *in, DDS *dest_dds, string &cid, const string &boundary)
1203 {
1204  // Code example from libxml2 docs re: read from a stream.
1205  if (!in || feof(in) || ferror(in))
1206  throw InternalErr(__FILE__, __LINE__,
1207  "Input stream not open or read error");
1208 
1209  const int size = 1024;
1210  char chars[size];
1211 
1212  int res = fread(chars, 1, 4, in);
1213  if (res > 0) {
1214  chars[4]='\0';
1215  xmlParserCtxtPtr context =
1216  xmlCreatePushParserCtxt(NULL, NULL, chars, res, "stream");
1217 
1218  ctxt = context; // need ctxt for error messages
1219  dds = dest_dds; // dump values here
1220  blob_href = &cid; // cid goes here
1221 
1222  xmlSAXHandler ddx_sax_parser;
1223  memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1224 
1225  ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1226  ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1227  ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1228  ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1229  ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1230  ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1231  ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1232  ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1233  ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1234  ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1235  ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1236  ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1237 
1238  context->sax = &ddx_sax_parser;
1239  context->userData = this;
1240  context->validate = true;
1241 
1242 
1243  while ((fgets(chars, size, in) > 0) && !is_boundary(chars, boundary)) {
1244  //chars[size-1] = '\0';
1245  DBG(cerr << "line (" << strlen(chars) << "): " << chars << endl);
1246 
1247  xmlParseChunk(ctxt, chars, strlen(chars), 0);
1248  }
1249  // This call ends the parse: The fourth argument of xmlParseChunk is
1250  // the bool 'terminate.'
1251  xmlParseChunk(ctxt, chars, 0, 1);
1252 
1253  cleanup_parse(context);
1254  }
1255 }
1256 
1257 
1269 void DDXParser::intern(const string & document, DDS * dest_dds, string &cid)
1270 {
1271  // Create the context pointer explicitly so that we can store a pointer
1272  // to it in the DDXParser instance. This provides a way to generate our
1273  // own error messages *with* line numbers. The messages are pretty
1274  // meaningless otherwise. This means that we use an interface from the
1275  // 'parser internals' header, and not the 'parser' header. However, this
1276  // interface is also used in one of the documented examples, so it's
1277  // probably pretty stable. 06/02/03 jhrg
1278  xmlParserCtxtPtr context = xmlCreateFileParserCtxt(document.c_str());
1279  if (!context)
1280  throw
1281  DDXParseFailed(string
1282  ("Could not initialize the parser with the file: '")
1283  + document + string("'."));
1284 
1285  dds = dest_dds; // dump values here
1286  blob_href = &cid;
1287  ctxt = context; // need ctxt for error messages
1288 
1289  xmlSAXHandler ddx_sax_parser;
1290  memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1291 
1292  ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1293  ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1294  ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1295  ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1296  ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1297  ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1298  ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1299  ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1300  ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1301  ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1302  ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1303  ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1304 
1305  context->sax = &ddx_sax_parser;
1306  context->userData = this;
1307  context->validate = false;
1308 
1309  xmlParseDocument(context);
1310 
1311  cleanup_parse(context);
1312 }
1313 
1314 } // namespace libdap
void intern_stream(FILE *in, DDS *dds, string &cid, const string &boundary="")
Read the DDX from a stream instead of a file.
Contains the attributes for a dataset.
Definition: AttrTable.h:142
virtual Structure * NewStructure(const string &n="") const
virtual Sequence * NewSequence(const string &n="") const
std::vector< BaseType * >::iterator Vars_iter
Definition: Constructor.h:62
#define DBGN(x)
Definition: debug.h:59
virtual Byte * NewByte(const string &n="") const
virtual Str * NewStr(const string &n="") const
static void ddx_start_document(void *parser)
static void ddx_get_cdata(void *parser, const xmlChar *value, int len)
Holds a structure (aggregate) type.
Definition: Structure.h:83
virtual void add_var(BaseType *bt, Part part=nil)
Add a variable.
Definition: BaseType.cc:721
Type
Identifies the data type.
Definition: Type.h:94
virtual Url * NewUrl(const string &n="") const
#define DBG2(x)
Definition: debug.h:73
A class for software fault reporting.
Definition: InternalErr.h:64
#define DBG(x)
Definition: debug.h:58
virtual bool is_constructor_type() const
Returns true if the instance is a constructor (i.e., Structure, Sequence or Grid) type variable...
Definition: BaseType.cc:353
static void ddx_fatal_error(void *parser, const char *msg,...)
ObjectType get_type(const string &value)
Definition: mime_util.cc:326
virtual Float32 * NewFloat32(const string &n="") const
virtual bool is_vector_type() const
Returns true if the instance is a vector (i.e., array) type variable.
Definition: BaseType.cc:343
virtual Float64 * NewFloat64(const string &n="") const
static void ddx_end_document(void *parser)
static xmlEntityPtr ddx_get_entity(void *parser, const xmlChar *name)
static void ddx_get_characters(void *parser, const xmlChar *ch, int len)
bool is_simple_type(Type t)
Returns true if the instance is a numeric, string or URL type variable.
Definition: util.cc:815
virtual AttrTable & get_attr_table()
Definition: DDS.cc:374
static void ddx_sax2_end_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)
static void ddx_sax2_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes)
virtual Array * NewArray(const string &n="", BaseType *v=0) const
string long_to_string(long val, int base)
Definition: util.cc:1012
bool is_boundary(const char *line, const string &boundary)
Definition: mime_util.cc:927
virtual Grid * NewGrid(const string &n="") const
virtual Int16 * NewInt16(const string &n="") const
virtual unsigned int append_attr(const string &name, const string &type, const string &value)
Add an attribute to the table.
Definition: AttrTable.cc:306
void set_dataset_name(const string &n)
Definition: DDS.cc:365
The basic data type for the DODS DAP types.
Definition: BaseType.h:117
virtual Int32 * NewInt32(const string &n="") const
Vars_iter var_begin()
Definition: Constructor.cc:331
virtual UInt16 * NewUInt16(const string &n="") const
static void ddx_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
Vars_iter var_end()
Definition: Constructor.cc:339
void intern(const string &document, DDS *dest_dds, string &cid)
string type_name(Type t)
Definition: util.cc:799
virtual UInt32 * NewUInt32(const string &n="") const
void set_dap_version(const string &version_string="2.0")
Definition: DDS.cc:442
void add_var(BaseType *bt)
Adds a copy of the variable to the DDS. Using the ptr_duplicate() method, perform a deep copy on the ...
Definition: DDS.cc:588