• Main Page
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

ext/psych/parser.c

Go to the documentation of this file.
00001 #include <psych.h>
00002 
00003 VALUE cPsychParser;
00004 VALUE ePsychSyntaxError;
00005 
00006 static ID id_read;
00007 static ID id_empty;
00008 static ID id_start_stream;
00009 static ID id_end_stream;
00010 static ID id_start_document;
00011 static ID id_end_document;
00012 static ID id_alias;
00013 static ID id_scalar;
00014 static ID id_start_sequence;
00015 static ID id_end_sequence;
00016 static ID id_start_mapping;
00017 static ID id_end_mapping;
00018 
00019 #define PSYCH_TRANSCODE(_str, _yaml_enc, _internal_enc) \
00020   do { \
00021     rb_enc_associate_index(_str, _yaml_enc); \
00022     if(_internal_enc) \
00023       _str = rb_str_export_to_enc(_str, _internal_enc); \
00024   } while (0)
00025 
00026 static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read)
00027 {
00028     VALUE io = (VALUE)data;
00029     VALUE string = rb_funcall(io, id_read, 1, INT2NUM(size));
00030 
00031     *read = 0;
00032 
00033     if(! NIL_P(string)) {
00034         void * str = (void *)StringValuePtr(string);
00035         *read = (size_t)RSTRING_LEN(string);
00036         memcpy(buf, str, *read);
00037     }
00038 
00039     return 1;
00040 }
00041 
00042 /*
00043  * call-seq:
00044  *    parser.parse(yaml)
00045  *
00046  * Parse the YAML document contained in +yaml+.  Events will be called on
00047  * the handler set on the parser instance.
00048  *
00049  * See Psych::Parser and Psych::Parser#handler
00050  */
00051 static VALUE parse(VALUE self, VALUE yaml)
00052 {
00053     yaml_parser_t parser;
00054     yaml_event_t event;
00055     int done = 0;
00056 #ifdef HAVE_RUBY_ENCODING_H
00057     int encoding = rb_enc_find_index("ASCII-8BIT");
00058     rb_encoding * internal_enc;
00059 #endif
00060     VALUE handler = rb_iv_get(self, "@handler");
00061 
00062 
00063     yaml_parser_initialize(&parser);
00064 
00065     if(rb_respond_to(yaml, id_read)) {
00066         yaml_parser_set_input(&parser, io_reader, (void *)yaml);
00067     } else {
00068         StringValue(yaml);
00069         yaml_parser_set_input_string(
00070                 &parser,
00071                 (const unsigned char *)RSTRING_PTR(yaml),
00072                 (size_t)RSTRING_LEN(yaml)
00073                 );
00074     }
00075 
00076     while(!done) {
00077         if(!yaml_parser_parse(&parser, &event)) {
00078             size_t line   = parser.mark.line;
00079             size_t column = parser.mark.column;
00080 
00081             yaml_parser_delete(&parser);
00082             rb_raise(ePsychSyntaxError, "couldn't parse YAML at line %d column %d",
00083                     (int)line, (int)column);
00084         }
00085 
00086         switch(event.type) {
00087           case YAML_STREAM_START_EVENT:
00088 
00089 #ifdef HAVE_RUBY_ENCODING_H
00090             switch(event.data.stream_start.encoding) {
00091               case YAML_ANY_ENCODING:
00092                 break;
00093               case YAML_UTF8_ENCODING:
00094                 encoding = rb_enc_find_index("UTF-8");
00095                 break;
00096               case YAML_UTF16LE_ENCODING:
00097                 encoding = rb_enc_find_index("UTF-16LE");
00098                 break;
00099               case YAML_UTF16BE_ENCODING:
00100                 encoding = rb_enc_find_index("UTF-16BE");
00101                 break;
00102               default:
00103                 break;
00104             }
00105             internal_enc = rb_default_internal_encoding();
00106 #endif
00107 
00108             rb_funcall(handler, id_start_stream, 1,
00109                        INT2NUM((long)event.data.stream_start.encoding)
00110                 );
00111             break;
00112           case YAML_DOCUMENT_START_EVENT:
00113             {
00114                 /* Get a list of tag directives (if any) */
00115                 VALUE tag_directives = rb_ary_new();
00116                 /* Grab the document version */
00117                 VALUE version = event.data.document_start.version_directive ?
00118                     rb_ary_new3(
00119                         (long)2,
00120                         INT2NUM((long)event.data.document_start.version_directive->major),
00121                         INT2NUM((long)event.data.document_start.version_directive->minor)
00122                         ) : rb_ary_new();
00123 
00124                 if(event.data.document_start.tag_directives.start) {
00125                     yaml_tag_directive_t *start =
00126                         event.data.document_start.tag_directives.start;
00127                     yaml_tag_directive_t *end =
00128                         event.data.document_start.tag_directives.end;
00129                     for(; start != end; start++) {
00130                         VALUE handle = Qnil;
00131                         VALUE prefix = Qnil;
00132                         if(start->handle) {
00133                             handle = rb_str_new2((const char *)start->handle);
00134 #ifdef HAVE_RUBY_ENCODING_H
00135                             PSYCH_TRANSCODE(handle, encoding, internal_enc);
00136 #endif
00137                         }
00138 
00139                         if(start->prefix) {
00140                             prefix = rb_str_new2((const char *)start->prefix);
00141 #ifdef HAVE_RUBY_ENCODING_H
00142                             PSYCH_TRANSCODE(prefix, encoding, internal_enc);
00143 #endif
00144                         }
00145 
00146                         rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
00147                     }
00148                 }
00149                 rb_funcall(handler, id_start_document, 3,
00150                            version, tag_directives,
00151                            event.data.document_start.implicit == 1 ? Qtrue : Qfalse
00152                     );
00153             }
00154             break;
00155           case YAML_DOCUMENT_END_EVENT:
00156             rb_funcall(handler, id_end_document, 1,
00157                        event.data.document_end.implicit == 1 ? Qtrue : Qfalse
00158                 );
00159             break;
00160           case YAML_ALIAS_EVENT:
00161             {
00162                 VALUE alias = Qnil;
00163                 if(event.data.alias.anchor) {
00164                     alias = rb_str_new2((const char *)event.data.alias.anchor);
00165 #ifdef HAVE_RUBY_ENCODING_H
00166                     PSYCH_TRANSCODE(alias, encoding, internal_enc);
00167 #endif
00168                 }
00169 
00170                 rb_funcall(handler, id_alias, 1, alias);
00171             }
00172             break;
00173           case YAML_SCALAR_EVENT:
00174             {
00175                 VALUE anchor = Qnil;
00176                 VALUE tag = Qnil;
00177                 VALUE plain_implicit, quoted_implicit, style;
00178                 VALUE val = rb_str_new(
00179                     (const char *)event.data.scalar.value,
00180                     (long)event.data.scalar.length
00181                     );
00182 
00183 #ifdef HAVE_RUBY_ENCODING_H
00184                 PSYCH_TRANSCODE(val, encoding, internal_enc);
00185 #endif
00186 
00187                 if(event.data.scalar.anchor) {
00188                     anchor = rb_str_new2((const char *)event.data.scalar.anchor);
00189 #ifdef HAVE_RUBY_ENCODING_H
00190                     PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00191 #endif
00192                 }
00193 
00194                 if(event.data.scalar.tag) {
00195                     tag = rb_str_new2((const char *)event.data.scalar.tag);
00196 #ifdef HAVE_RUBY_ENCODING_H
00197                     PSYCH_TRANSCODE(tag, encoding, internal_enc);
00198 #endif
00199                 }
00200 
00201                 plain_implicit =
00202                     event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;
00203 
00204                 quoted_implicit =
00205                     event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;
00206 
00207                 style = INT2NUM((long)event.data.scalar.style);
00208 
00209                 rb_funcall(handler, id_scalar, 6,
00210                            val, anchor, tag, plain_implicit, quoted_implicit, style);
00211             }
00212             break;
00213           case YAML_SEQUENCE_START_EVENT:
00214             {
00215                 VALUE anchor = Qnil;
00216                 VALUE tag = Qnil;
00217                 VALUE implicit, style;
00218                 if(event.data.sequence_start.anchor) {
00219                     anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
00220 #ifdef HAVE_RUBY_ENCODING_H
00221                     PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00222 #endif
00223                 }
00224 
00225                 tag = Qnil;
00226                 if(event.data.sequence_start.tag) {
00227                     tag = rb_str_new2((const char *)event.data.sequence_start.tag);
00228 #ifdef HAVE_RUBY_ENCODING_H
00229                     PSYCH_TRANSCODE(tag, encoding, internal_enc);
00230 #endif
00231                 }
00232 
00233                 implicit =
00234                     event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;
00235 
00236                 style = INT2NUM((long)event.data.sequence_start.style);
00237 
00238                 rb_funcall(handler, id_start_sequence, 4,
00239                            anchor, tag, implicit, style);
00240             }
00241             break;
00242           case YAML_SEQUENCE_END_EVENT:
00243             rb_funcall(handler, id_end_sequence, 0);
00244             break;
00245           case YAML_MAPPING_START_EVENT:
00246             {
00247                 VALUE anchor = Qnil;
00248                 VALUE tag = Qnil;
00249                 VALUE implicit, style;
00250                 if(event.data.mapping_start.anchor) {
00251                     anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
00252 #ifdef HAVE_RUBY_ENCODING_H
00253                     PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00254 #endif
00255                 }
00256 
00257                 if(event.data.mapping_start.tag) {
00258                     tag = rb_str_new2((const char *)event.data.mapping_start.tag);
00259 #ifdef HAVE_RUBY_ENCODING_H
00260                     PSYCH_TRANSCODE(tag, encoding, internal_enc);
00261 #endif
00262                 }
00263 
00264                 implicit =
00265                     event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;
00266 
00267                 style = INT2NUM((long)event.data.mapping_start.style);
00268 
00269                 rb_funcall(handler, id_start_mapping, 4,
00270                            anchor, tag, implicit, style);
00271             }
00272             break;
00273           case YAML_MAPPING_END_EVENT:
00274             rb_funcall(handler, id_end_mapping, 0);
00275             break;
00276           case YAML_NO_EVENT:
00277             rb_funcall(handler, id_empty, 0);
00278             break;
00279           case YAML_STREAM_END_EVENT:
00280             rb_funcall(handler, id_end_stream, 0);
00281             done = 1;
00282             break;
00283         }
00284     }
00285 
00286     return self;
00287 }
00288 
00289 void Init_psych_parser()
00290 {
00291 #if 0
00292     mPsych = rb_define_module("Psych");
00293 #endif
00294 
00295     cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject);
00296 
00297     /* Any encoding: Let the parser choose the encoding */
00298     rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING));
00299 
00300     /* UTF-8 Encoding */
00301     rb_define_const(cPsychParser, "UTF8", INT2NUM(YAML_UTF8_ENCODING));
00302 
00303     /* UTF-16-LE Encoding with BOM */
00304     rb_define_const(cPsychParser, "UTF16LE", INT2NUM(YAML_UTF16LE_ENCODING));
00305 
00306     /* UTF-16-BE Encoding with BOM */
00307     rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING));
00308 
00309     ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError);
00310 
00311     rb_define_method(cPsychParser, "parse", parse, 1);
00312 
00313     id_read           = rb_intern("read");
00314     id_empty          = rb_intern("empty");
00315     id_start_stream   = rb_intern("start_stream");
00316     id_end_stream     = rb_intern("end_stream");
00317     id_start_document = rb_intern("start_document");
00318     id_end_document   = rb_intern("end_document");
00319     id_alias          = rb_intern("alias");
00320     id_scalar         = rb_intern("scalar");
00321     id_start_sequence = rb_intern("start_sequence");
00322     id_end_sequence   = rb_intern("end_sequence");
00323     id_start_mapping  = rb_intern("start_mapping");
00324     id_end_mapping    = rb_intern("end_mapping");
00325 }
00326 /* vim: set noet sws=4 sw=4: */
00327 

Generated on Wed Sep 8 2010 21:53:51 for Ruby by  doxygen 1.7.1