Object
ruby constants for strings (should this be moved somewhere else?)
How the parser advances to the next token.
@return true if not at end of file (EOF).
# File lib/ruby_lexer.rb, line 53 53: def advance 54: r = yylex 55: self.token = r 56: 57: raise "yylex returned nil" unless r 58: 59: return RubyLexer::EOF != r 60: end
# File lib/ruby_lexer.rb, line 62 62: def arg_ambiguous 63: self.warning("Ambiguous first argument. make sure.") 64: end
# File lib/ruby_lexer.rb, line 72 72: def expr_beg_push val 73: cond.push false 74: cmdarg.push false 75: self.lex_state = :expr_beg 76: self.yacc_value = val 77: end
# File lib/ruby_lexer.rb, line 79 79: def fix_arg_lex_state 80: self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot 81: :expr_arg 82: else 83: :expr_beg 84: end 85: end
# File lib/ruby_lexer.rb, line 87 87: def heredoc here # 63 lines 88: _, eos, func, last_line = here 89: 90: indent = (func & STR_FUNC_INDENT) != 0 91: expand = (func & STR_FUNC_EXPAND) != 0 92: eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/ 93: err_msg = "can't match #{eos_re.inspect} anywhere in " 94: 95: rb_compile_error err_msg if 96: src.eos? 97: 98: if src.beginning_of_line? && src.scan(eos_re) then 99: src.unread_many last_line # TODO: figure out how to remove this 100: self.yacc_value = eos 101: return :tSTRING_END 102: end 103: 104: self.string_buffer = [] 105: 106: if expand then 107: case 108: when src.scan(/#[$@]/) then 109: src.pos -= 1 # FIX omg stupid 110: self.yacc_value = src.matched 111: return :tSTRING_DVAR 112: when src.scan(/#[{]/) then 113: self.yacc_value = src.matched 114: return :tSTRING_DBEG 115: when src.scan(/#/) then 116: string_buffer << '#' 117: end 118: 119: until src.scan(eos_re) do 120: c = tokadd_string func, "\n", nil 121: 122: rb_compile_error err_msg if 123: c == RubyLexer::EOF 124: 125: if c != "\n" then 126: self.yacc_value = string_buffer.join.delete("\r") 127: return :tSTRING_CONTENT 128: else 129: string_buffer << src.scan(/\n/) 130: end 131: 132: rb_compile_error err_msg if 133: src.eos? 134: end 135: 136: # tack on a NL after the heredoc token - FIX NL should not be needed 137: src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid 138: else 139: until src.check(eos_re) do 140: string_buffer << src.scan(/.*(\n|\z)/) 141: rb_compile_error err_msg if 142: src.eos? 143: end 144: end 145: 146: self.lex_strterm = [:heredoc, eos, func, last_line] 147: self.yacc_value = string_buffer.join.delete("\r") 148: 149: return :tSTRING_CONTENT 150: end
# File lib/ruby_lexer.rb, line 152 152: def heredoc_identifier # 51 lines 153: term, func = nil, STR_FUNC_BORING 154: self.string_buffer = [] 155: 156: case 157: when src.scan(/(-?)(['"`])(.*?)\22//) then 158: term = src[2] 159: unless src[1].empty? then 160: func |= STR_FUNC_INDENT 161: end 162: func |= case term 163: when "\'" then 164: STR_SQUOTE 165: when '"' then 166: STR_DQUOTE 167: else 168: STR_XQUOTE 169: end 170: string_buffer << src[3] 171: when src.scan(/-?(['"`])(?!\11**\Z)/) then 172: rb_compile_error "unterminated here document identifier" 173: when src.scan(/(-?)(\w+)/) then 174: term = '"' 175: func |= STR_DQUOTE 176: unless src[1].empty? then 177: func |= STR_FUNC_INDENT 178: end 179: string_buffer << src[2] 180: else 181: return nil 182: end 183: 184: if src.check(/.*\n/) then 185: # TODO: think about storing off the char range instead 186: line = src.string[src.pos, src.matched_size] 187: src.string[src.pos, src.matched_size] = "\n" 188: src.pos += 1 189: else 190: line = nil 191: end 192: 193: self.lex_strterm = [:heredoc, string_buffer.join, func, line] 194: 195: if term == '`' then 196: self.yacc_value = "`" 197: return :tXSTRING_BEG 198: else 199: self.yacc_value = "\"" 200: return :tSTRING_BEG 201: end 202: end
# File lib/ruby_lexer.rb, line 213 213: def int_with_base base 214: rb_compile_error "Invalid numeric format" if src.matched =~ /__/ 215: self.yacc_value = src.matched.to_i(base) 216: return :tINTEGER 217: end
# File lib/ruby_lexer.rb, line 219 219: def lex_state= o 220: raise "wtf?" unless Symbol === o 221: @lex_state = o 222: end
# File lib/ruby_lexer.rb, line 225 225: def lineno 226: @lineno ||= src.lineno 227: end
Parse a number from the input stream.
@param c The first character of the number. @return A int constant wich represents a token.
# File lib/ruby_lexer.rb, line 235 235: def parse_number 236: self.lex_state = :expr_end 237: 238: case 239: when src.scan(/[+-]?0[xbd]\b/) then 240: rb_compile_error "Invalid numeric format" 241: when src.scan(/[+-]?0x[a-f0-9_]+/) then 242: int_with_base(16) 243: when src.scan(/[+-]?0b[01_]+/) then 244: int_with_base(2) 245: when src.scan(/[+-]?0d[0-9_]+/) then 246: int_with_base(10) 247: when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then 248: rb_compile_error "Illegal octal digit." 249: when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then 250: int_with_base(8) 251: when src.scan(/[+-]?[\d_]+_(e|\.)/) then 252: rb_compile_error "Trailing '_' in number." 253: when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/) then 254: number = src.matched 255: if number =~ /__/ then 256: rb_compile_error "Invalid numeric format" 257: end 258: self.yacc_value = number.to_f 259: :tFLOAT 260: when src.scan(/[+-]?0\b/) then 261: int_with_base(10) 262: when src.scan(/[+-]?[\d_]+\b/) then 263: int_with_base(10) 264: else 265: rb_compile_error "Bad number format" 266: end 267: end
# File lib/ruby_lexer.rb, line 269 269: def parse_quote # 58 lines 270: beg, nnd, short_hand, c = nil, nil, false, nil 271: 272: if src.scan(/[a-z0-9]{1,2}/) then # Long-hand (e.g. %Q{}). 273: rb_compile_error "unknown type of %string" if src.matched_size == 2 274: c, beg, short_hand = src.matched, src.getch, false 275: else # Short-hand (e.g. %{, %., %!, etc) 276: c, beg, short_hand = 'Q', src.getch, true 277: end 278: 279: if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then 280: rb_compile_error "unterminated quoted string meets end of file" 281: end 282: 283: # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting? 284: nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg] 285: nnd, beg = beg, "\00"" if nnd.nil? 286: 287: token_type, self.yacc_value = nil, "%#{c}#{beg}" 288: token_type, string_type = case c 289: when 'Q' then 290: ch = short_hand ? nnd : c + beg 291: self.yacc_value = "%#{ch}" 292: [:tSTRING_BEG, STR_DQUOTE] 293: when 'q' then 294: [:tSTRING_BEG, STR_SQUOTE] 295: when 'W' then 296: src.scan(/\s*/) 297: [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS] 298: when 'w' then 299: src.scan(/\s*/) 300: [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS] 301: when 'x' then 302: [:tXSTRING_BEG, STR_XQUOTE] 303: when 'r' then 304: [:tREGEXP_BEG, STR_REGEXP] 305: when 's' then 306: self.lex_state = :expr_fname 307: [:tSYMBEG, STR_SSYM] 308: end 309: 310: rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if 311: token_type.nil? 312: 313: self.lex_strterm = [:strterm, string_type, nnd, beg] 314: 315: return token_type 316: end
# File lib/ruby_lexer.rb, line 318 318: def parse_string(quote) # 65 lines 319: _, string_type, term, open = quote 320: 321: space = false # FIX: remove these 322: func = string_type 323: paren = open 324: term_re = Regexp.escape term 325: 326: awords = (func & STR_FUNC_AWORDS) != 0 327: regexp = (func & STR_FUNC_REGEXP) != 0 328: expand = (func & STR_FUNC_EXPAND) != 0 329: 330: unless func then # FIX: impossible, prolly needs == 0 331: self.lineno = nil 332: return :tSTRING_END 333: end 334: 335: space = true if awords and src.scan(/\s+/) 336: 337: if self.nest == 0 && src.scan(/#{term_re}/) then 338: if awords then 339: quote[1] = nil 340: return :tSPACE 341: elsif regexp then 342: self.yacc_value = self.regx_options 343: self.lineno = nil 344: return :tREGEXP_END 345: else 346: self.yacc_value = term 347: self.lineno = nil 348: return :tSTRING_END 349: end 350: end 351: 352: if space then 353: return :tSPACE 354: end 355: 356: self.string_buffer = [] 357: 358: if expand 359: case 360: when src.scan(/#(?=[$@])/) then 361: return :tSTRING_DVAR 362: when src.scan(/#[{]/) then 363: return :tSTRING_DBEG 364: when src.scan(/#/) then 365: string_buffer << '#' 366: end 367: end 368: 369: if tokadd_string(func, term, paren) == RubyLexer::EOF then 370: rb_compile_error "unterminated string meets end of file" 371: end 372: 373: self.yacc_value = string_buffer.join 374: 375: 376: return :tSTRING_CONTENT 377: end
# File lib/ruby_lexer.rb, line 1232 1232: def process_token(command_state) 1233: 1234: token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/) 1235: 1236: result = nil 1237: last_state = lex_state 1238: 1239: 1240: case token 1241: when /^\$/ then 1242: self.lex_state, result = :expr_end, :tGVAR 1243: when /^@@/ then 1244: self.lex_state, result = :expr_end, :tCVAR 1245: when /^@/ then 1246: self.lex_state, result = :expr_end, :tIVAR 1247: else 1248: if token =~ /[!?]$/ then 1249: result = :tFID 1250: else 1251: if lex_state == :expr_fname then 1252: # ident=, not =~ => == or followed by => 1253: # TODO test lexing of a=>b vs a==>b 1254: if src.scan(/=(?:(?![~>=])|(?==>))/) then 1255: result = :tIDENTIFIER 1256: token << src.matched 1257: end 1258: end 1259: 1260: result ||= if token =~ /^[A-Z]/ then 1261: :tCONSTANT 1262: else 1263: :tIDENTIFIER 1264: end 1265: end 1266: 1267: unless lex_state == :expr_dot then 1268: # See if it is a reserved word. 1269: keyword = Keyword.keyword token 1270: 1271: if keyword then 1272: state = lex_state 1273: self.lex_state = keyword.state 1274: self.yacc_value = token 1275: 1276: if keyword.id0 == :kDO then 1277: self.command_start = true 1278: return :kDO_COND if cond.is_in_state 1279: return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg 1280: return :kDO_BLOCK if state == :expr_endarg 1281: return :kDO 1282: end 1283: 1284: return keyword.id0 if state == :expr_beg 1285: 1286: self.lex_state = :expr_beg if keyword.id0 != keyword.id1 1287: 1288: return keyword.id1 1289: end 1290: end 1291: 1292: if (lex_state == :expr_beg || lex_state == :expr_mid || 1293: lex_state == :expr_dot || lex_state == :expr_arg || 1294: lex_state == :expr_cmdarg) then 1295: if command_state then 1296: self.lex_state = :expr_cmdarg 1297: else 1298: self.lex_state = :expr_arg 1299: end 1300: else 1301: self.lex_state = :expr_end 1302: end 1303: end 1304: 1305: self.yacc_value = token 1306: 1307: 1308: self.lex_state = :expr_end if 1309: last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar 1310: 1311: return result 1312: end
# File lib/ruby_lexer.rb, line 379 379: def rb_compile_error msg 380: msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}" 381: raise SyntaxError, msg 382: end
# File lib/ruby_lexer.rb, line 384 384: def read_escape # 51 lines 385: case 386: when src.scan(/\\/) then # Backslash 387: '\' 388: when src.scan(/n/) then # newline 389: "\n" 390: when src.scan(/t/) then # horizontal tab 391: "\t" 392: when src.scan(/r/) then # carriage-return 393: "\r" 394: when src.scan(/f/) then # form-feed 395: "\f" 396: when src.scan(/v/) then # vertical tab 397: "\113"" 398: when src.scan(/a/) then # alarm(bell) 399: "\0007" 400: when src.scan(/e/) then # escape 401: "\0033" 402: when src.scan(/b/) then # backspace 403: "\0010" 404: when src.scan(/s/) then # space 405: " " 406: when src.scan(/[0-7]{1,3}/) then # octal constant 407: src.matched.to_i(8).chr 408: when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant 409: src[1].to_i(16).chr 410: when src.check(/M-\\[\\MCc]/) then 411: src.scan(/M-\\/) # eat it 412: c = self.read_escape 413: c[0] = (c[0].ord | 0x80).chr 414: c 415: when src.scan(/M-(.)/) then 416: c = src[1] 417: c[0] = (c[0].ord | 0x80).chr 418: c 419: when src.check(/(C-|c)\\[\\MCc]/) then 420: src.scan(/(C-|c)\\/) # eat it 421: c = self.read_escape 422: c[0] = (c[0].ord & 0x9f).chr 423: c 424: when src.scan(/C-\?|c\?/) then 425: 127.chr 426: when src.scan(/(C-|c)(.)/) then 427: c = src[2] 428: c[0] = (c[0].ord & 0x9f).chr 429: c 430: when src.scan(/[McCx0-9]/) || src.eos? then 431: rb_compile_error("Invalid escape character syntax") 432: else 433: src.getch 434: end 435: end
# File lib/ruby_lexer.rb, line 437 437: def regx_options # 15 lines 438: good, bad = [], [] 439: 440: if src.scan(/[a-z]+/) then 441: good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ } 442: end 443: 444: unless bad.empty? then 445: rb_compile_error("unknown regexp option%s - %s" % 446: [(bad.size > 1 ? "s" : ""), bad.join.inspect]) 447: end 448: 449: return good.join 450: end
# File lib/ruby_lexer.rb, line 452 452: def reset 453: self.command_start = true 454: self.lex_strterm = nil 455: self.token = nil 456: self.yacc_value = nil 457: 458: @src = nil 459: @lex_state = nil 460: end
# File lib/ruby_lexer.rb, line 462 462: def src= src 463: raise "bad src: #{src.inspect}" unless String === src 464: @src = RPStringScanner.new(src) 465: end
# File lib/ruby_lexer.rb, line 467 467: def tokadd_escape term # 20 lines 468: case 469: when src.scan(/\\\n/) then 470: # just ignore 471: when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then 472: self.string_buffer << src.matched 473: when src.scan(/\\([MC]-|c)(?=\\)/) then 474: self.string_buffer << src.matched 475: self.tokadd_escape term 476: when src.scan(/\\([MC]-|c)(.)/) then 477: self.string_buffer << src.matched 478: when src.scan(/\\[McCx]/) then 479: rb_compile_error "Invalid escape character syntax" 480: when src.scan(/\\(.)/) then 481: self.string_buffer << src.matched 482: else 483: rb_compile_error "Invalid escape character syntax" 484: end 485: end
# File lib/ruby_lexer.rb, line 487 487: def tokadd_string(func, term, paren) # 105 lines 488: awords = (func & STR_FUNC_AWORDS) != 0 489: escape = (func & STR_FUNC_ESCAPE) != 0 490: expand = (func & STR_FUNC_EXPAND) != 0 491: regexp = (func & STR_FUNC_REGEXP) != 0 492: symbol = (func & STR_FUNC_SYMBOL) != 0 493: 494: paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren)) 495: term_re = Regexp.new(Regexp.escape(term)) 496: 497: until src.eos? do 498: c = nil 499: handled = true 500: case 501: when self.nest == 0 && src.scan(term_re) then 502: src.pos -= 1 503: break 504: when paren_re && src.scan(paren_re) then 505: self.nest += 1 506: when src.scan(term_re) then 507: self.nest -= 1 508: when awords && src.scan(/\s/) then 509: src.pos -= 1 510: break 511: when expand && src.scan(/#(?=[\$\@\{])/) then 512: src.pos -= 1 513: break 514: when expand && src.scan(/#(?!\n)/) then 515: # do nothing 516: when src.check(/\\/) then 517: case 518: when awords && src.scan(/\\\n/) then 519: string_buffer << "\n" 520: next 521: when awords && src.scan(/\\\s/) then 522: c = ' ' 523: when expand && src.scan(/\\\n/) then 524: next 525: when regexp && src.check(/\\/) then 526: self.tokadd_escape term 527: next 528: when expand && src.scan(/\\/) then 529: c = self.read_escape 530: when src.scan(/\\\n/) then 531: # do nothing 532: when src.scan(/\\\\/) then 533: string_buffer << '\' if escape 534: c = '\' 535: when src.scan(/\\/) then 536: unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then 537: string_buffer << "\\" 538: end 539: else 540: handled = false 541: end 542: else 543: handled = false 544: end # case 545: 546: unless handled then 547: 548: t = Regexp.escape term 549: x = Regexp.escape(paren) if paren && paren != "\0000" 550: re = if awords then 551: /[^#{t}#{x}\#\00\\\\n\ ]+|./ # |. to pick up whatever 552: else 553: /[^#{t}#{x}\#\00\\\]+|./ 554: end 555: 556: src.scan re 557: c = src.matched 558: 559: rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\00// 560: end # unless handled 561: 562: c ||= src.matched 563: string_buffer << c 564: end # until 565: 566: c ||= src.matched 567: c = RubyLexer::EOF if src.eos? 568: 569: 570: return c 571: end
# File lib/ruby_lexer.rb, line 573 573: def unescape s 574: 575: r = { 576: "a" => "\0007", 577: "b" => "\0010", 578: "e" => "\0033", 579: "f" => "\f", 580: "n" => "\n", 581: "r" => "\r", 582: "s" => " ", 583: "t" => "\t", 584: "v" => "\113"", 585: "\\" => '\', 586: "\n" => "", 587: "C-\?" => 127.chr, 588: "c\?" => 127.chr, 589: }[s] 590: 591: return r if r 592: 593: case s 594: when /^[0-7]{1,3}/ then 595: $&.to_i(8).chr 596: when /^x([0-9a-fA-F]{1,2})/ then 597: $1.to_i(16).chr 598: when /^M-(.)/ then 599: ($1[0].ord | 0x80).chr 600: when /^(C-|c)(.)/ then 601: ($2[0].ord & 0x9f).chr 602: when /^[McCx0-9]/ then 603: rb_compile_error("Invalid escape character syntax") 604: else 605: s 606: end 607: end
# File lib/ruby_lexer.rb, line 609 609: def warning s 610: # do nothing for now 611: end
Returns the next token. Also sets yy_val is needed.
@return Description of the Returned Value
# File lib/ruby_lexer.rb, line 618 618: def yylex # 826 lines 619: 620: c = '' 621: space_seen = false 622: command_state = false 623: src = self.src 624: 625: self.token = nil 626: self.yacc_value = nil 627: 628: return yylex_string if lex_strterm 629: 630: command_state = self.command_start 631: self.command_start = false 632: 633: last_state = lex_state 634: 635: loop do # START OF CASE 636: if src.scan(/\ |\t|\r|\f|\113//) then # white spaces, 13 = '\v 637: space_seen = true 638: next 639: elsif src.check(/[^a-zA-Z]/) then 640: if src.scan(/\n|#/) then 641: self.lineno = nil 642: c = src.matched 643: if c == '#' then 644: src.unread c # ok 645: 646: while src.scan(/\s*#.*(\n+|\z)/) do 647: @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '') 648: end 649: 650: if src.eos? then 651: return RubyLexer::EOF 652: end 653: end 654: 655: # Replace a string of newlines with a single one 656: src.scan(/\n+/) 657: 658: if [:expr_beg, :expr_fname, 659: :expr_dot, :expr_class].include? lex_state then 660: next 661: end 662: 663: self.command_start = true 664: self.lex_state = :expr_beg 665: return :tNL 666: elsif src.scan(/[\]\)\}]/) then 667: cond.lexpop 668: cmdarg.lexpop 669: self.lex_state = :expr_end 670: self.yacc_value = src.matched 671: result = { 672: ")" => :tRPAREN, 673: "]" => :tRBRACK, 674: "}" => :tRCURLY 675: }[src.matched] 676: return result 677: elsif src.check(/\./) then 678: if src.scan(/\.\.\./) then 679: self.lex_state = :expr_beg 680: self.yacc_value = "..." 681: return :tDOT3 682: elsif src.scan(/\.\./) then 683: self.lex_state = :expr_beg 684: self.yacc_value = ".." 685: return :tDOT2 686: elsif src.scan(/\.\d/) then 687: rb_compile_error "no .<digit> floating literal anymore put 0 before dot" 688: elsif src.scan(/\./) then 689: self.lex_state = :expr_dot 690: self.yacc_value = "." 691: return :tDOT 692: end 693: elsif src.scan(/\,/) then 694: self.lex_state = :expr_beg 695: self.yacc_value = "," 696: return :tCOMMA 697: elsif src.scan(/\(/) then 698: result = :tLPAREN2 699: self.command_start = true 700: if lex_state == :expr_beg || lex_state == :expr_mid then 701: result = :tLPAREN 702: elsif space_seen then 703: if lex_state == :expr_cmdarg then 704: result = :tLPAREN_ARG 705: elsif lex_state == :expr_arg then 706: warning("don't put space before argument parentheses") 707: result = :tLPAREN2 708: end 709: end 710: 711: self.expr_beg_push "(" 712: 713: return result 714: elsif src.check(/\=/) then 715: if src.scan(/\=\=\=/) then 716: self.fix_arg_lex_state 717: self.yacc_value = "===" 718: return :tEQQ 719: elsif src.scan(/\=\=/) then 720: self.fix_arg_lex_state 721: self.yacc_value = "==" 722: return :tEQ 723: elsif src.scan(/\=~/) then 724: self.fix_arg_lex_state 725: self.yacc_value = "=~" 726: return :tMATCH 727: elsif src.scan(/\=>/) then 728: self.fix_arg_lex_state 729: self.yacc_value = "=>" 730: return :tASSOC 731: elsif src.scan(/\=/) then 732: if src.was_begin_of_line and src.scan(/begin(?=\s)/) then 733: @comments << '=' << src.matched 734: 735: unless src.scan(/.*?\n=end\s*(\n|\z)/) then 736: @comments.clear 737: rb_compile_error("embedded document meets end of file") 738: end 739: 740: @comments << src.matched 741: 742: next 743: else 744: self.fix_arg_lex_state 745: self.yacc_value = '=' 746: return :tEQL 747: end 748: end 749: elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/) then 750: self.yacc_value = src.matched[1..2].gsub(ESC_RE) { unescape $1 } 751: self.lex_state = :expr_end 752: return :tSTRING 753: elsif src.scan(/\"/) then # FALLBACK 754: self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\00""] # TODO: question this 755: self.yacc_value = "\"" 756: return :tSTRING_BEG 757: elsif src.scan(/\@\@?\w*/) then 758: self.token = src.matched 759: 760: rb_compile_error "`#{token}` is not allowed as a variable name" if 761: token =~ /\@\d/ 762: 763: return process_token(command_state) 764: elsif src.scan(/\:\:/) then 765: if (lex_state == :expr_beg || 766: lex_state == :expr_mid || 767: lex_state == :expr_class || 768: (lex_state.is_argument && space_seen)) then 769: self.lex_state = :expr_beg 770: self.yacc_value = "::" 771: return :tCOLON3 772: end 773: 774: self.lex_state = :expr_dot 775: self.yacc_value = "::" 776: return :tCOLON2 777: elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then 778: self.yacc_value = src[1] 779: self.lex_state = :expr_end 780: return :tSYMBOL 781: elsif src.scan(/\:/) then 782: # ?: / then / when 783: if (lex_state == :expr_end || lex_state == :expr_endarg|| 784: src.check(/\s/)) then 785: self.lex_state = :expr_beg 786: self.yacc_value = ":" 787: return :tCOLON 788: end 789: 790: case 791: when src.scan(/\'/) then 792: self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\00""] 793: when src.scan(/\"/) then 794: self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\00""] 795: end 796: 797: self.lex_state = :expr_fname 798: self.yacc_value = ":" 799: return :tSYMBEG 800: elsif src.check(/[0-9]/) then 801: return parse_number 802: elsif src.scan(/\[/) then 803: result = src.matched 804: 805: if lex_state == :expr_fname || lex_state == :expr_dot then 806: self.lex_state = :expr_arg 807: case 808: when src.scan(/\]\=/) then 809: self.yacc_value = "[]=" 810: return :tASET 811: when src.scan(/\]/) then 812: self.yacc_value = "[]" 813: return :tAREF 814: else 815: rb_compile_error "unexpected '['" 816: end 817: elsif lex_state == :expr_beg || lex_state == :expr_mid then 818: result = :tLBRACK 819: elsif lex_state.is_argument && space_seen then 820: result = :tLBRACK 821: end 822: 823: self.expr_beg_push "[" 824: 825: return result 826: elsif src.scan(/\'(\\.|[^\'])*\'/) then 827: self.yacc_value = src.matched[1..2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") 828: self.lex_state = :expr_end 829: return :tSTRING 830: elsif src.check(/\|/) then 831: if src.scan(/\|\|\=/) then 832: self.lex_state = :expr_beg 833: self.yacc_value = "||" 834: return :tOP_ASGN 835: elsif src.scan(/\|\|/) then 836: self.lex_state = :expr_beg 837: self.yacc_value = "||" 838: return :tOROP 839: elsif src.scan(/\|\=/) then 840: self.lex_state = :expr_beg 841: self.yacc_value = "|" 842: return :tOP_ASGN 843: elsif src.scan(/\|/) then 844: self.fix_arg_lex_state 845: self.yacc_value = "|" 846: return :tPIPE 847: end 848: elsif src.scan(/\{/) then 849: result = if lex_state.is_argument || lex_state == :expr_end then 850: :tLCURLY # block (primary) 851: elsif lex_state == :expr_endarg then 852: :tLBRACE_ARG # block (expr) 853: else 854: :tLBRACE # hash 855: end 856: 857: self.expr_beg_push "{" 858: 859: return result 860: elsif src.scan(/[+-]/) then 861: sign = src.matched 862: utype, type = if sign == "+" then 863: [:tUPLUS, :tPLUS] 864: else 865: [:tUMINUS, :tMINUS] 866: end 867: 868: if lex_state == :expr_fname || lex_state == :expr_dot then 869: self.lex_state = :expr_arg 870: if src.scan(/@/) then 871: self.yacc_value = "#{sign}@" 872: return utype 873: else 874: self.yacc_value = sign 875: return type 876: end 877: end 878: 879: if src.scan(/\=/) then 880: self.lex_state = :expr_beg 881: self.yacc_value = sign 882: return :tOP_ASGN 883: end 884: 885: if (lex_state == :expr_beg || lex_state == :expr_mid || 886: (lex_state.is_argument && space_seen && !src.check(/\s/))) then 887: if lex_state.is_argument then 888: arg_ambiguous 889: end 890: 891: self.lex_state = :expr_beg 892: self.yacc_value = sign 893: 894: if src.check(/\d/) then 895: if utype == :tUPLUS then 896: return self.parse_number 897: else 898: return :tUMINUS_NUM 899: end 900: end 901: 902: return utype 903: end 904: 905: self.lex_state = :expr_beg 906: self.yacc_value = sign 907: return type 908: elsif src.check(/\*/) then 909: if src.scan(/\*\*=/) then 910: self.lex_state = :expr_beg 911: self.yacc_value = "**" 912: return :tOP_ASGN 913: elsif src.scan(/\*\*/) then 914: self.yacc_value = "**" 915: self.fix_arg_lex_state 916: return :tPOW 917: elsif src.scan(/\*\=/) then 918: self.lex_state = :expr_beg 919: self.yacc_value = "*" 920: return :tOP_ASGN 921: elsif src.scan(/\*/) then 922: result = if lex_state.is_argument && space_seen && src.check(/\S/) then 923: warning("`*' interpreted as argument prefix") 924: :tSTAR 925: elsif lex_state == :expr_beg || lex_state == :expr_mid then 926: :tSTAR 927: else 928: :tSTAR2 929: end 930: self.yacc_value = "*" 931: self.fix_arg_lex_state 932: 933: return result 934: end 935: elsif src.check(/\!/) then 936: if src.scan(/\!\=/) then 937: self.lex_state = :expr_beg 938: self.yacc_value = "!=" 939: return :tNEQ 940: elsif src.scan(/\!~/) then 941: self.lex_state = :expr_beg 942: self.yacc_value = "!~" 943: return :tNMATCH 944: elsif src.scan(/\!/) then 945: self.lex_state = :expr_beg 946: self.yacc_value = "!" 947: return :tBANG 948: end 949: elsif src.check(/\</) then 950: if src.scan(/\<\=\>/) then 951: self.fix_arg_lex_state 952: self.yacc_value = "<=>" 953: return :tCMP 954: elsif src.scan(/\<\=/) then 955: self.fix_arg_lex_state 956: self.yacc_value = "<=" 957: return :tLEQ 958: elsif src.scan(/\<\<\=/) then 959: self.fix_arg_lex_state 960: self.lex_state = :expr_beg 961: self.yacc_value = "\<\<" 962: return :tOP_ASGN 963: elsif src.scan(/\<\</) then 964: if (! [:expr_end, :expr_dot, 965: :expr_endarg, :expr_class].include?(lex_state) && 966: (!lex_state.is_argument || space_seen)) then 967: tok = self.heredoc_identifier 968: if tok then 969: return tok 970: end 971: end 972: 973: self.fix_arg_lex_state 974: self.yacc_value = "\<\<" 975: return :tLSHFT 976: elsif src.scan(/\</) then 977: self.fix_arg_lex_state 978: self.yacc_value = "<" 979: return :tLT 980: end 981: elsif src.check(/\>/) then 982: if src.scan(/\>\=/) then 983: self.fix_arg_lex_state 984: self.yacc_value = ">=" 985: return :tGEQ 986: elsif src.scan(/\>\>=/) then 987: self.fix_arg_lex_state 988: self.lex_state = :expr_beg 989: self.yacc_value = ">>" 990: return :tOP_ASGN 991: elsif src.scan(/\>\>/) then 992: self.fix_arg_lex_state 993: self.yacc_value = ">>" 994: return :tRSHFT 995: elsif src.scan(/\>/) then 996: self.fix_arg_lex_state 997: self.yacc_value = ">" 998: return :tGT 999: end 1000: elsif src.scan(/\`/) then 1001: self.yacc_value = "`" 1002: case lex_state 1003: when :expr_fname then 1004: self.lex_state = :expr_end 1005: return :tBACK_REF2 1006: when :expr_dot then 1007: self.lex_state = if command_state then 1008: :expr_cmdarg 1009: else 1010: :expr_arg 1011: end 1012: return :tBACK_REF2 1013: end 1014: self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\00""] 1015: return :tXSTRING_BEG 1016: elsif src.scan(/\?/) then 1017: if lex_state == :expr_end || lex_state == :expr_endarg then 1018: self.lex_state = :expr_beg 1019: self.yacc_value = "?" 1020: return :tEH 1021: end 1022: 1023: if src.eos? then 1024: rb_compile_error "incomplete character syntax" 1025: end 1026: 1027: if src.check(/\s|\v/) then 1028: unless lex_state.is_argument then 1029: c2 = { " " => 's', 1030: "\n" => 'n', 1031: "\t" => 't', 1032: "\v" => 'v', 1033: "\r" => 'r', 1034: "\f" => 'f' }[src.matched] 1035: 1036: if c2 then 1037: warning("invalid character syntax; use ?\\" + c2) 1038: end 1039: end 1040: 1041: # ternary 1042: self.lex_state = :expr_beg 1043: self.yacc_value = "?" 1044: return :tEH 1045: elsif src.check(/\w(?=\w)/) then # ternary, also 1046: self.lex_state = :expr_beg 1047: self.yacc_value = "?" 1048: return :tEH 1049: end 1050: 1051: c = if src.scan(/\\/) then 1052: self.read_escape 1053: else 1054: src.getch 1055: end 1056: self.lex_state = :expr_end 1057: self.yacc_value = c[0].ord & 0xff 1058: return :tINTEGER 1059: elsif src.check(/\&/) then 1060: if src.scan(/\&\&\=/) then 1061: self.yacc_value = "&&" 1062: self.lex_state = :expr_beg 1063: return :tOP_ASGN 1064: elsif src.scan(/\&\&/) then 1065: self.lex_state = :expr_beg 1066: self.yacc_value = "&&" 1067: return :tANDOP 1068: elsif src.scan(/\&\=/) then 1069: self.yacc_value = "&" 1070: self.lex_state = :expr_beg 1071: return :tOP_ASGN 1072: elsif src.scan(/&/) then 1073: result = if lex_state.is_argument && space_seen && 1074: !src.check(/\s/) then 1075: warning("`&' interpreted as argument prefix") 1076: :tAMPER 1077: elsif lex_state == :expr_beg || lex_state == :expr_mid then 1078: :tAMPER 1079: else 1080: :tAMPER2 1081: end 1082: 1083: self.fix_arg_lex_state 1084: self.yacc_value = "&" 1085: return result 1086: end 1087: elsif src.scan(/\//) then 1088: if lex_state == :expr_beg || lex_state == :expr_mid then 1089: self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""] 1090: self.yacc_value = "/" 1091: return :tREGEXP_BEG 1092: end 1093: 1094: if src.scan(/\=/) then 1095: self.yacc_value = "/" 1096: self.lex_state = :expr_beg 1097: return :tOP_ASGN 1098: end 1099: 1100: if lex_state.is_argument && space_seen then 1101: unless src.scan(/\s/) then 1102: arg_ambiguous 1103: self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""] 1104: self.yacc_value = "/" 1105: return :tREGEXP_BEG 1106: end 1107: end 1108: 1109: self.fix_arg_lex_state 1110: self.yacc_value = "/" 1111: 1112: return :tDIVIDE 1113: elsif src.scan(/\^=/) then 1114: self.lex_state = :expr_beg 1115: self.yacc_value = "^" 1116: return :tOP_ASGN 1117: elsif src.scan(/\^/) then 1118: self.fix_arg_lex_state 1119: self.yacc_value = "^" 1120: return :tCARET 1121: elsif src.scan(/\;/) then 1122: self.command_start = true 1123: self.lex_state = :expr_beg 1124: self.yacc_value = ";" 1125: return :tSEMI 1126: elsif src.scan(/\~/) then 1127: if lex_state == :expr_fname || lex_state == :expr_dot then 1128: src.scan(/@/) 1129: end 1130: 1131: self.fix_arg_lex_state 1132: self.yacc_value = "~" 1133: 1134: return :tTILDE 1135: elsif src.scan(/\\/) then 1136: if src.scan(/\n/) then 1137: self.lineno = nil 1138: space_seen = true 1139: next 1140: end 1141: rb_compile_error "bare backslash only allowed before newline" 1142: elsif src.scan(/\%/) then 1143: if lex_state == :expr_beg || lex_state == :expr_mid then 1144: return parse_quote 1145: end 1146: 1147: if src.scan(/\=/) then 1148: self.lex_state = :expr_beg 1149: self.yacc_value = "%" 1150: return :tOP_ASGN 1151: end 1152: 1153: if lex_state.is_argument && space_seen && ! src.check(/\s/) then 1154: return parse_quote 1155: end 1156: 1157: self.fix_arg_lex_state 1158: self.yacc_value = "%" 1159: 1160: return :tPERCENT 1161: elsif src.check(/\$/) then 1162: if src.scan(/(\$_)(\w+)/) then 1163: self.lex_state = :expr_end 1164: self.token = src.matched 1165: return process_token(command_state) 1166: elsif src.scan(/\$_/) then 1167: self.lex_state = :expr_end 1168: self.token = src.matched 1169: self.yacc_value = src.matched 1170: return :tGVAR 1171: elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then 1172: self.lex_state = :expr_end 1173: self.yacc_value = src.matched 1174: return :tGVAR 1175: elsif src.scan(/\$([\&\`\'\+])/) then 1176: self.lex_state = :expr_end 1177: # Explicit reference to these vars as symbols... 1178: if last_state == :expr_fname then 1179: self.yacc_value = src.matched 1180: return :tGVAR 1181: else 1182: self.yacc_value = src[1].to_sym 1183: return :tBACK_REF 1184: end 1185: elsif src.scan(/\$([1-9]\d*)/) then 1186: self.lex_state = :expr_end 1187: if last_state == :expr_fname then 1188: self.yacc_value = src.matched 1189: return :tGVAR 1190: else 1191: self.yacc_value = src[1].to_i 1192: return :tNTH_REF 1193: end 1194: elsif src.scan(/\$0/) then 1195: self.lex_state = :expr_end 1196: self.token = src.matched 1197: return process_token(command_state) 1198: elsif src.scan(/\$\W|\$\z/) then # TODO: remove? 1199: self.lex_state = :expr_end 1200: self.yacc_value = "$" 1201: return "$" 1202: elsif src.scan(/\$\w+/) 1203: self.lex_state = :expr_end 1204: self.token = src.matched 1205: return process_token(command_state) 1206: end 1207: elsif src.check(/\_/) then 1208: if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then 1209: self.lineno = nil 1210: return RubyLexer::EOF 1211: elsif src.scan(/\_\w*/) then 1212: self.token = src.matched 1213: return process_token(command_state) 1214: end 1215: end 1216: end # END OF CASE 1217: 1218: if src.scan(/\0004|\0032|\0000/) || src.eos? then # ^D, ^Z, EOF 1219: return RubyLexer::EOF 1220: else # alpha check 1221: if src.scan(/\W/) then 1222: rb_compile_error "Invalid char #{src.matched.inspect} in expression" 1223: end 1224: end 1225: 1226: self.token = src.matched if self.src.scan(/\w+/) 1227: 1228: return process_token(command_state) 1229: end 1230: end
# File lib/ruby_lexer.rb, line 1314 1314: def yylex_string # 23 lines 1315: token = if lex_strterm[0] == :heredoc then 1316: self.heredoc lex_strterm 1317: else 1318: self.parse_string lex_strterm 1319: end 1320: 1321: if token == :tSTRING_END || token == :tREGEXP_END then 1322: self.lineno = nil 1323: self.lex_strterm = nil 1324: self.lex_state = :expr_end 1325: end 1326: 1327: return token 1328: end
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.