Parent

RubyLexer

Constants

ESC_RE
EOF
STR_FUNC_BORING

ruby constants for strings (should this be moved somewhere else?)

STR_FUNC_ESCAPE
STR_FUNC_EXPAND
STR_FUNC_REGEXP
STR_FUNC_AWORDS
STR_FUNC_SYMBOL
STR_FUNC_INDENT
STR_SQUOTE
STR_DQUOTE
STR_XQUOTE
STR_REGEXP
STR_SSYM
STR_DSYM

Attributes

command_start[RW]
cmdarg[RW]
cond[RW]
nest[RW]
lex_state[R]

Additional context surrounding tokens that both the lexer and grammar use.

lex_strterm[RW]
parser[RW]
src[R]

Stream of data that yylex examines.

token[RW]

Last token read via yylex.

string_buffer[RW]
yacc_value[RW]

Value of last token which had a value associated with it.

warnings[RW]

What handles warnings

lineno[W]

Public Class Methods

new() click to toggle source
     # File lib/ruby_lexer.rb, line 204
204:   def initialize
205:     self.cond = StackState.new(:cond)
206:     self.cmdarg = StackState.new(:cmdarg)
207:     self.nest = 0
208:     @comments = []
209: 
210:     reset
211:   end

Public Instance Methods

advance() click to toggle source

How the parser advances to the next token.

@return true if not at end of file (EOF).

    # File lib/ruby_lexer.rb, line 53
53:   def advance
54:     r = yylex
55:     self.token = r
56: 
57:     raise "yylex returned nil" unless r
58: 
59:     return RubyLexer::EOF != r
60:   end
arg_ambiguous() click to toggle source
    # File lib/ruby_lexer.rb, line 62
62:   def arg_ambiguous
63:     self.warning("Ambiguous first argument. make sure.")
64:   end
comments() click to toggle source
    # File lib/ruby_lexer.rb, line 66
66:   def comments
67:     c = @comments.join
68:     @comments.clear
69:     c
70:   end
expr_beg_push(val) click to toggle source
    # File lib/ruby_lexer.rb, line 72
72:   def expr_beg_push val
73:     cond.push false
74:     cmdarg.push false
75:     self.lex_state = :expr_beg
76:     self.yacc_value = val
77:   end
fix_arg_lex_state() click to toggle source
    # File lib/ruby_lexer.rb, line 79
79:   def fix_arg_lex_state
80:     self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
81:                        :expr_arg
82:                      else
83:                        :expr_beg
84:                      end
85:   end
heredoc(here) click to toggle source
     # File lib/ruby_lexer.rb, line 87
 87:   def heredoc here # 63 lines
 88:     _, eos, func, last_line = here
 89: 
 90:     indent  = (func & STR_FUNC_INDENT) != 0
 91:     expand  = (func & STR_FUNC_EXPAND) != 0
 92:     eos_re  = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
 93:     err_msg = "can't match #{eos_re.inspect} anywhere in "
 94: 
 95:     rb_compile_error err_msg if
 96:       src.eos?
 97: 
 98:     if src.beginning_of_line? && src.scan(eos_re) then
 99:       src.unread_many last_line # TODO: figure out how to remove this
100:       self.yacc_value = eos
101:       return :tSTRING_END
102:     end
103: 
104:     self.string_buffer = []
105: 
106:     if expand then
107:       case
108:       when src.scan(/#[$@]/) then
109:         src.pos -= 1 # FIX omg stupid
110:         self.yacc_value = src.matched
111:         return :tSTRING_DVAR
112:       when src.scan(/#[{]/) then
113:         self.yacc_value = src.matched
114:         return :tSTRING_DBEG
115:       when src.scan(/#/) then
116:         string_buffer << '#'
117:       end
118: 
119:       until src.scan(eos_re) do
120:         c = tokadd_string func, "\n", nil
121: 
122:         rb_compile_error err_msg if
123:           c == RubyLexer::EOF
124: 
125:         if c != "\n" then
126:           self.yacc_value = string_buffer.join.delete("\r")
127:           return :tSTRING_CONTENT
128:         else
129:           string_buffer << src.scan(/\n/)
130:         end
131: 
132:         rb_compile_error err_msg if
133:           src.eos?
134:       end
135: 
136:       # tack on a NL after the heredoc token - FIX NL should not be needed
137:       src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
138:     else
139:       until src.check(eos_re) do
140:         string_buffer << src.scan(/.*(\n|\z)/)
141:         rb_compile_error err_msg if
142:           src.eos?
143:       end
144:     end
145: 
146:     self.lex_strterm = [:heredoc, eos, func, last_line]
147:     self.yacc_value = string_buffer.join.delete("\r")
148: 
149:     return :tSTRING_CONTENT
150:   end
heredoc_identifier() click to toggle source
     # File lib/ruby_lexer.rb, line 152
152:   def heredoc_identifier # 51 lines
153:     term, func = nil, STR_FUNC_BORING
154:     self.string_buffer = []
155: 
156:     case
157:     when src.scan(/(-?)(['"`])(.*?)\22//) then
158:       term = src[2]
159:       unless src[1].empty? then
160:         func |= STR_FUNC_INDENT
161:       end
162:       func |= case term
163:               when "\'" then
164:                 STR_SQUOTE
165:               when '"' then
166:                 STR_DQUOTE
167:               else
168:                 STR_XQUOTE
169:               end
170:       string_buffer << src[3]
171:     when src.scan(/-?(['"`])(?!\11**\Z)/) then
172:       rb_compile_error "unterminated here document identifier"
173:     when src.scan(/(-?)(\w+)/) then
174:       term = '"'
175:       func |= STR_DQUOTE
176:       unless src[1].empty? then
177:         func |= STR_FUNC_INDENT
178:       end
179:       string_buffer << src[2]
180:     else
181:       return nil
182:     end
183: 
184:     if src.check(/.*\n/) then
185:       # TODO: think about storing off the char range instead
186:       line = src.string[src.pos, src.matched_size]
187:       src.string[src.pos, src.matched_size] = "\n"
188:       src.pos += 1
189:     else
190:       line = nil
191:     end
192: 
193:     self.lex_strterm = [:heredoc, string_buffer.join, func, line]
194: 
195:     if term == '`' then
196:       self.yacc_value = "`"
197:       return :tXSTRING_BEG
198:     else
199:       self.yacc_value = "\""
200:       return :tSTRING_BEG
201:     end
202:   end
int_with_base(base) click to toggle source
     # File lib/ruby_lexer.rb, line 213
213:   def int_with_base base
214:     rb_compile_error "Invalid numeric format" if src.matched =~ /__/
215:     self.yacc_value = src.matched.to_i(base)
216:     return :tINTEGER
217:   end
lex_state=(o) click to toggle source
     # File lib/ruby_lexer.rb, line 219
219:   def lex_state= o
220:     raise "wtf?" unless Symbol === o
221:     @lex_state = o
222:   end
lineno() click to toggle source
     # File lib/ruby_lexer.rb, line 225
225:   def lineno
226:     @lineno ||= src.lineno
227:   end
parse_number() click to toggle source
 Parse a number from the input stream.

@param c The first character of the number. @return A int constant wich represents a token.

     # File lib/ruby_lexer.rb, line 235
235:   def parse_number
236:     self.lex_state = :expr_end
237: 
238:     case
239:     when src.scan(/[+-]?0[xbd]\b/) then
240:       rb_compile_error "Invalid numeric format"
241:     when src.scan(/[+-]?0x[a-f0-9_]+/) then
242:       int_with_base(16)
243:     when src.scan(/[+-]?0b[01_]+/) then
244:       int_with_base(2)
245:     when src.scan(/[+-]?0d[0-9_]+/) then
246:       int_with_base(10)
247:     when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
248:       rb_compile_error "Illegal octal digit."
249:     when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
250:       int_with_base(8)
251:     when src.scan(/[+-]?[\d_]+_(e|\.)/) then
252:       rb_compile_error "Trailing '_' in number."
253:     when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/) then
254:       number = src.matched
255:       if number =~ /__/ then
256:         rb_compile_error "Invalid numeric format"
257:       end
258:       self.yacc_value = number.to_f
259:       :tFLOAT
260:     when src.scan(/[+-]?0\b/) then
261:       int_with_base(10)
262:     when src.scan(/[+-]?[\d_]+\b/) then
263:       int_with_base(10)
264:     else
265:       rb_compile_error "Bad number format"
266:     end
267:   end
parse_quote() click to toggle source
     # File lib/ruby_lexer.rb, line 269
269:   def parse_quote # 58 lines
270:     beg, nnd, short_hand, c = nil, nil, false, nil
271: 
272:     if src.scan(/[a-z0-9]{1,2}/) then # Long-hand (e.g. %Q{}).
273:       rb_compile_error "unknown type of %string" if src.matched_size == 2
274:       c, beg, short_hand = src.matched, src.getch, false
275:     else                               # Short-hand (e.g. %{, %., %!, etc)
276:       c, beg, short_hand = 'Q', src.getch, true
277:     end
278: 
279:     if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
280:       rb_compile_error "unterminated quoted string meets end of file"
281:     end
282: 
283:     # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
284:     nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
285:     nnd, beg = beg, "\00"" if nnd.nil?
286: 
287:     token_type, self.yacc_value = nil, "%#{c}#{beg}"
288:     token_type, string_type = case c
289:                               when 'Q' then
290:                                 ch = short_hand ? nnd : c + beg
291:                                 self.yacc_value = "%#{ch}"
292:                                 [:tSTRING_BEG,   STR_DQUOTE]
293:                               when 'q' then
294:                                 [:tSTRING_BEG,   STR_SQUOTE]
295:                               when 'W' then
296:                                 src.scan(/\s*/)
297:                                 [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_AWORDS]
298:                               when 'w' then
299:                                 src.scan(/\s*/)
300:                                 [:tAWORDS_BEG,   STR_SQUOTE | STR_FUNC_AWORDS]
301:                               when 'x' then
302:                                 [:tXSTRING_BEG,  STR_XQUOTE]
303:                               when 'r' then
304:                                 [:tREGEXP_BEG,   STR_REGEXP]
305:                               when 's' then
306:                                 self.lex_state  = :expr_fname
307:                                 [:tSYMBEG,       STR_SSYM]
308:                               end
309: 
310:     rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
311:       token_type.nil?
312: 
313:     self.lex_strterm = [:strterm, string_type, nnd, beg]
314: 
315:     return token_type
316:   end
parse_string(quote) click to toggle source
     # File lib/ruby_lexer.rb, line 318
318:   def parse_string(quote) # 65 lines
319:     _, string_type, term, open = quote
320: 
321:     space = false # FIX: remove these
322:     func = string_type
323:     paren = open
324:     term_re = Regexp.escape term
325: 
326:     awords = (func & STR_FUNC_AWORDS) != 0
327:     regexp = (func & STR_FUNC_REGEXP) != 0
328:     expand = (func & STR_FUNC_EXPAND) != 0
329: 
330:     unless func then # FIX: impossible, prolly needs == 0
331:       self.lineno = nil
332:       return :tSTRING_END
333:     end
334: 
335:     space = true if awords and src.scan(/\s+/)
336: 
337:     if self.nest == 0 && src.scan(/#{term_re}/) then
338:       if awords then
339:         quote[1] = nil
340:         return :tSPACE
341:       elsif regexp then
342:         self.yacc_value = self.regx_options
343:         self.lineno = nil
344:         return :tREGEXP_END
345:       else
346:         self.yacc_value = term
347:         self.lineno = nil
348:         return :tSTRING_END
349:       end
350:     end
351: 
352:     if space then
353:       return :tSPACE
354:     end
355: 
356:     self.string_buffer = []
357: 
358:     if expand
359:       case
360:       when src.scan(/#(?=[$@])/) then
361:         return :tSTRING_DVAR
362:       when src.scan(/#[{]/) then
363:         return :tSTRING_DBEG
364:       when src.scan(/#/) then
365:         string_buffer << '#'
366:       end
367:     end
368: 
369:     if tokadd_string(func, term, paren) == RubyLexer::EOF then
370:       rb_compile_error "unterminated string meets end of file"
371:     end
372: 
373:     self.yacc_value = string_buffer.join
374: 
375: 
376:     return :tSTRING_CONTENT
377:   end
process_token(command_state) click to toggle source
      # File lib/ruby_lexer.rb, line 1232
1232:   def process_token(command_state)
1233: 
1234:     token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)
1235: 
1236:     result = nil
1237:     last_state = lex_state
1238: 
1239: 
1240:     case token
1241:     when /^\$/ then
1242:       self.lex_state, result = :expr_end, :tGVAR
1243:     when /^@@/ then
1244:       self.lex_state, result = :expr_end, :tCVAR
1245:     when /^@/ then
1246:       self.lex_state, result = :expr_end, :tIVAR
1247:     else
1248:       if token =~ /[!?]$/ then
1249:         result = :tFID
1250:       else
1251:         if lex_state == :expr_fname then
1252:           # ident=, not =~ => == or followed by =>
1253:           # TODO test lexing of a=>b vs a==>b
1254:           if src.scan(/=(?:(?![~>=])|(?==>))/) then
1255:             result = :tIDENTIFIER
1256:             token << src.matched
1257:           end
1258:         end
1259: 
1260:         result ||= if token =~ /^[A-Z]/ then
1261:                      :tCONSTANT
1262:                    else
1263:                      :tIDENTIFIER
1264:                    end
1265:       end
1266: 
1267:       unless lex_state == :expr_dot then
1268:         # See if it is a reserved word.
1269:         keyword = Keyword.keyword token
1270: 
1271:         if keyword then
1272:           state           = lex_state
1273:           self.lex_state  = keyword.state
1274:           self.yacc_value = token
1275: 
1276:           if keyword.id0 == :kDO then
1277:             self.command_start = true
1278:             return :kDO_COND  if cond.is_in_state
1279:             return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1280:             return :kDO_BLOCK if state == :expr_endarg
1281:             return :kDO
1282:           end
1283: 
1284:           return keyword.id0 if state == :expr_beg
1285: 
1286:           self.lex_state = :expr_beg if keyword.id0 != keyword.id1
1287: 
1288:           return keyword.id1
1289:         end
1290:       end
1291: 
1292:       if (lex_state == :expr_beg || lex_state == :expr_mid ||
1293:           lex_state == :expr_dot || lex_state == :expr_arg ||
1294:           lex_state == :expr_cmdarg) then
1295:         if command_state then
1296:           self.lex_state = :expr_cmdarg
1297:         else
1298:           self.lex_state = :expr_arg
1299:         end
1300:       else
1301:         self.lex_state = :expr_end
1302:       end
1303:     end
1304: 
1305:     self.yacc_value = token
1306: 
1307: 
1308:     self.lex_state = :expr_end if
1309:       last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
1310: 
1311:     return result
1312:   end
rb_compile_error(msg) click to toggle source
     # File lib/ruby_lexer.rb, line 379
379:   def rb_compile_error msg
380:     msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
381:     raise SyntaxError, msg
382:   end
read_escape() click to toggle source
     # File lib/ruby_lexer.rb, line 384
384:   def read_escape # 51 lines
385:     case
386:     when src.scan(/\\/) then                  # Backslash
387:       '\'
388:     when src.scan(/n/) then                   # newline
389:       "\n"
390:     when src.scan(/t/) then                   # horizontal tab
391:       "\t"
392:     when src.scan(/r/) then                   # carriage-return
393:       "\r"
394:     when src.scan(/f/) then                   # form-feed
395:       "\f"
396:     when src.scan(/v/) then                   # vertical tab
397:       "\113""
398:     when src.scan(/a/) then                   # alarm(bell)
399:       "\0007"
400:     when src.scan(/e/) then                   # escape
401:       "\0033"
402:     when src.scan(/b/) then                   # backspace
403:       "\0010"
404:     when src.scan(/s/) then                   # space
405:       " "
406:     when src.scan(/[0-7]{1,3}/) then          # octal constant
407:       src.matched.to_i(8).chr
408:     when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
409:       src[1].to_i(16).chr
410:     when src.check(/M-\\[\\MCc]/) then
411:       src.scan(/M-\\/) # eat it
412:       c = self.read_escape
413:       c[0] = (c[0].ord | 0x80).chr
414:       c
415:     when src.scan(/M-(.)/) then
416:       c = src[1]
417:       c[0] = (c[0].ord | 0x80).chr
418:       c
419:     when src.check(/(C-|c)\\[\\MCc]/) then
420:       src.scan(/(C-|c)\\/) # eat it
421:       c = self.read_escape
422:       c[0] = (c[0].ord & 0x9f).chr
423:       c
424:     when src.scan(/C-\?|c\?/) then
425:       127.chr
426:     when src.scan(/(C-|c)(.)/) then
427:       c = src[2]
428:       c[0] = (c[0].ord & 0x9f).chr
429:       c
430:     when src.scan(/[McCx0-9]/) || src.eos? then
431:       rb_compile_error("Invalid escape character syntax")
432:     else
433:       src.getch
434:     end
435:   end
regx_options() click to toggle source
     # File lib/ruby_lexer.rb, line 437
437:   def regx_options # 15 lines
438:     good, bad = [], []
439: 
440:     if src.scan(/[a-z]+/) then
441:       good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
442:     end
443: 
444:     unless bad.empty? then
445:       rb_compile_error("unknown regexp option%s - %s" %
446:                        [(bad.size > 1 ? "s" : ""), bad.join.inspect])
447:     end
448: 
449:     return good.join
450:   end
reset() click to toggle source
     # File lib/ruby_lexer.rb, line 452
452:   def reset
453:     self.command_start = true
454:     self.lex_strterm   = nil
455:     self.token         = nil
456:     self.yacc_value    = nil
457: 
458:     @src       = nil
459:     @lex_state = nil
460:   end
src=(src) click to toggle source
     # File lib/ruby_lexer.rb, line 462
462:   def src= src
463:     raise "bad src: #{src.inspect}" unless String === src
464:     @src = RPStringScanner.new(src)
465:   end
tokadd_escape(term) click to toggle source
     # File lib/ruby_lexer.rb, line 467
467:   def tokadd_escape term # 20 lines
468:     case
469:     when src.scan(/\\\n/) then
470:       # just ignore
471:     when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
472:       self.string_buffer << src.matched
473:     when src.scan(/\\([MC]-|c)(?=\\)/) then
474:       self.string_buffer << src.matched
475:       self.tokadd_escape term
476:     when src.scan(/\\([MC]-|c)(.)/) then
477:       self.string_buffer << src.matched
478:     when src.scan(/\\[McCx]/) then
479:       rb_compile_error "Invalid escape character syntax"
480:     when src.scan(/\\(.)/) then
481:       self.string_buffer << src.matched
482:     else
483:       rb_compile_error "Invalid escape character syntax"
484:     end
485:   end
tokadd_string(func, term, paren) click to toggle source
     # File lib/ruby_lexer.rb, line 487
487:   def tokadd_string(func, term, paren) # 105 lines
488:     awords = (func & STR_FUNC_AWORDS) != 0
489:     escape = (func & STR_FUNC_ESCAPE) != 0
490:     expand = (func & STR_FUNC_EXPAND) != 0
491:     regexp = (func & STR_FUNC_REGEXP) != 0
492:     symbol = (func & STR_FUNC_SYMBOL) != 0
493: 
494:     paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
495:     term_re  = Regexp.new(Regexp.escape(term))
496: 
497:     until src.eos? do
498:       c = nil
499:       handled = true
500:       case
501:       when self.nest == 0 && src.scan(term_re) then
502:         src.pos -= 1
503:         break
504:       when paren_re && src.scan(paren_re) then
505:         self.nest += 1
506:       when src.scan(term_re) then
507:         self.nest -= 1
508:       when awords && src.scan(/\s/) then
509:         src.pos -= 1
510:         break
511:       when expand && src.scan(/#(?=[\$\@\{])/) then
512:         src.pos -= 1
513:         break
514:       when expand && src.scan(/#(?!\n)/) then
515:         # do nothing
516:       when src.check(/\\/) then
517:         case
518:         when awords && src.scan(/\\\n/) then
519:           string_buffer << "\n"
520:           next
521:         when awords && src.scan(/\\\s/) then
522:           c = ' '
523:         when expand && src.scan(/\\\n/) then
524:           next
525:         when regexp && src.check(/\\/) then
526:           self.tokadd_escape term
527:           next
528:         when expand && src.scan(/\\/) then
529:           c = self.read_escape
530:         when src.scan(/\\\n/) then
531:           # do nothing
532:         when src.scan(/\\\\/) then
533:           string_buffer << '\' if escape
534:           c = '\'
535:         when src.scan(/\\/) then
536:           unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
537:             string_buffer << "\\"
538:           end
539:         else
540:           handled = false
541:         end
542:       else
543:         handled = false
544:       end # case
545: 
546:       unless handled then
547: 
548:         t = Regexp.escape term
549:         x = Regexp.escape(paren) if paren && paren != "\0000"
550:         re = if awords then
551:                /[^#{t}#{x}\#\00\\\\n\ ]+|./ # |. to pick up whatever
552:              else
553:                /[^#{t}#{x}\#\00\\\]+|./
554:              end
555: 
556:         src.scan re
557:         c = src.matched
558: 
559:         rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\00//
560:       end # unless handled
561: 
562:       c ||= src.matched
563:       string_buffer << c
564:     end # until
565: 
566:     c ||= src.matched
567:     c = RubyLexer::EOF if src.eos?
568: 
569: 
570:     return c
571:   end
unescape(s) click to toggle source
     # File lib/ruby_lexer.rb, line 573
573:   def unescape s
574: 
575:     r = {
576:       "a"    => "\0007",
577:       "b"    => "\0010",
578:       "e"    => "\0033",
579:       "f"    => "\f",
580:       "n"    => "\n",
581:       "r"    => "\r",
582:       "s"    => " ",
583:       "t"    => "\t",
584:       "v"    => "\113"",
585:       "\\"   => '\',
586:       "\n"   => "",
587:       "C-\?" => 127.chr,
588:       "c\?"  => 127.chr,
589:     }[s]
590: 
591:     return r if r
592: 
593:     case s
594:     when /^[0-7]{1,3}/ then
595:       $&.to_i(8).chr
596:     when /^x([0-9a-fA-F]{1,2})/ then
597:       $1.to_i(16).chr
598:     when /^M-(.)/ then
599:       ($1[0].ord | 0x80).chr
600:     when /^(C-|c)(.)/ then
601:       ($2[0].ord & 0x9f).chr
602:     when /^[McCx0-9]/ then
603:       rb_compile_error("Invalid escape character syntax")
604:     else
605:       s
606:     end
607:   end
warning(s) click to toggle source
     # File lib/ruby_lexer.rb, line 609
609:   def warning s
610:     # do nothing for now
611:   end
yylex() click to toggle source

Returns the next token. Also sets yy_val is needed.

@return Description of the Returned Value

      # File lib/ruby_lexer.rb, line 618
 618:   def yylex # 826 lines
 619: 
 620:     c = ''
 621:     space_seen = false
 622:     command_state = false
 623:     src = self.src
 624: 
 625:     self.token = nil
 626:     self.yacc_value = nil
 627: 
 628:     return yylex_string if lex_strterm
 629: 
 630:     command_state = self.command_start
 631:     self.command_start = false
 632: 
 633:     last_state = lex_state
 634: 
 635:     loop do # START OF CASE
 636:       if src.scan(/\ |\t|\r|\f|\113//) then # white spaces, 13 = '\v
 637:         space_seen = true
 638:         next
 639:       elsif src.check(/[^a-zA-Z]/) then
 640:         if src.scan(/\n|#/) then
 641:           self.lineno = nil
 642:           c = src.matched
 643:           if c == '#' then
 644:             src.unread c # ok
 645: 
 646:             while src.scan(/\s*#.*(\n+|\z)/) do
 647:               @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
 648:             end
 649: 
 650:             if src.eos? then
 651:               return RubyLexer::EOF
 652:             end
 653:           end
 654: 
 655:           # Replace a string of newlines with a single one
 656:           src.scan(/\n+/)
 657: 
 658:           if [:expr_beg, :expr_fname,
 659:               :expr_dot, :expr_class].include? lex_state then
 660:             next
 661:           end
 662: 
 663:           self.command_start = true
 664:           self.lex_state = :expr_beg
 665:           return :tNL
 666:         elsif src.scan(/[\]\)\}]/) then
 667:           cond.lexpop
 668:           cmdarg.lexpop
 669:           self.lex_state = :expr_end
 670:           self.yacc_value = src.matched
 671:           result = {
 672:             ")" => :tRPAREN,
 673:             "]" => :tRBRACK,
 674:             "}" => :tRCURLY
 675:           }[src.matched]
 676:           return result
 677:         elsif src.check(/\./) then
 678:           if src.scan(/\.\.\./) then
 679:             self.lex_state = :expr_beg
 680:             self.yacc_value = "..."
 681:             return :tDOT3
 682:           elsif src.scan(/\.\./) then
 683:             self.lex_state = :expr_beg
 684:             self.yacc_value = ".."
 685:             return :tDOT2
 686:           elsif src.scan(/\.\d/) then
 687:             rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
 688:           elsif src.scan(/\./) then
 689:             self.lex_state = :expr_dot
 690:             self.yacc_value = "."
 691:             return :tDOT
 692:           end
 693:         elsif src.scan(/\,/) then
 694:           self.lex_state = :expr_beg
 695:           self.yacc_value = ","
 696:           return :tCOMMA
 697:         elsif src.scan(/\(/) then
 698:           result = :tLPAREN2
 699:           self.command_start = true
 700:           if lex_state == :expr_beg || lex_state == :expr_mid then
 701:             result = :tLPAREN
 702:           elsif space_seen then
 703:             if lex_state == :expr_cmdarg then
 704:               result = :tLPAREN_ARG
 705:             elsif lex_state == :expr_arg then
 706:               warning("don't put space before argument parentheses")
 707:               result = :tLPAREN2
 708:             end
 709:           end
 710: 
 711:           self.expr_beg_push "("
 712: 
 713:           return result
 714:         elsif src.check(/\=/) then
 715:           if src.scan(/\=\=\=/) then
 716:             self.fix_arg_lex_state
 717:             self.yacc_value = "==="
 718:             return :tEQQ
 719:           elsif src.scan(/\=\=/) then
 720:             self.fix_arg_lex_state
 721:             self.yacc_value = "=="
 722:             return :tEQ
 723:           elsif src.scan(/\=~/) then
 724:             self.fix_arg_lex_state
 725:             self.yacc_value = "=~"
 726:             return :tMATCH
 727:           elsif src.scan(/\=>/) then
 728:             self.fix_arg_lex_state
 729:             self.yacc_value = "=>"
 730:             return :tASSOC
 731:           elsif src.scan(/\=/) then
 732:             if src.was_begin_of_line and src.scan(/begin(?=\s)/) then
 733:               @comments << '=' << src.matched
 734: 
 735:               unless src.scan(/.*?\n=end\s*(\n|\z)/) then
 736:                 @comments.clear
 737:                 rb_compile_error("embedded document meets end of file")
 738:               end
 739: 
 740:               @comments << src.matched
 741: 
 742:               next
 743:             else
 744:               self.fix_arg_lex_state
 745:               self.yacc_value = '='
 746:               return :tEQL
 747:             end
 748:           end
 749:         elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/) then
 750:           self.yacc_value = src.matched[1..2].gsub(ESC_RE) { unescape $1 }
 751:           self.lex_state = :expr_end
 752:           return :tSTRING
 753:         elsif src.scan(/\"/) then # FALLBACK
 754:           self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\00""] # TODO: question this
 755:           self.yacc_value = "\""
 756:           return :tSTRING_BEG
 757:         elsif src.scan(/\@\@?\w*/) then
 758:           self.token = src.matched
 759: 
 760:           rb_compile_error "`#{token}` is not allowed as a variable name" if
 761:             token =~ /\@\d/
 762: 
 763:           return process_token(command_state)
 764:         elsif src.scan(/\:\:/) then
 765:           if (lex_state == :expr_beg ||
 766:               lex_state == :expr_mid ||
 767:               lex_state == :expr_class ||
 768:               (lex_state.is_argument && space_seen)) then
 769:             self.lex_state = :expr_beg
 770:             self.yacc_value = "::"
 771:             return :tCOLON3
 772:           end
 773: 
 774:           self.lex_state = :expr_dot
 775:           self.yacc_value = "::"
 776:           return :tCOLON2
 777:         elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
 778:           self.yacc_value = src[1]
 779:           self.lex_state = :expr_end
 780:           return :tSYMBOL
 781:         elsif src.scan(/\:/) then
 782:           # ?: / then / when
 783:           if (lex_state == :expr_end || lex_state == :expr_endarg||
 784:               src.check(/\s/)) then
 785:             self.lex_state = :expr_beg
 786:             self.yacc_value = ":"
 787:             return :tCOLON
 788:           end
 789: 
 790:           case
 791:           when src.scan(/\'/) then
 792:             self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\00""]
 793:           when src.scan(/\"/) then
 794:             self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\00""]
 795:           end
 796: 
 797:           self.lex_state = :expr_fname
 798:           self.yacc_value = ":"
 799:           return :tSYMBEG
 800:         elsif src.check(/[0-9]/) then
 801:           return parse_number
 802:         elsif src.scan(/\[/) then
 803:           result = src.matched
 804: 
 805:           if lex_state == :expr_fname || lex_state == :expr_dot then
 806:             self.lex_state = :expr_arg
 807:             case
 808:             when src.scan(/\]\=/) then
 809:               self.yacc_value = "[]="
 810:               return :tASET
 811:             when src.scan(/\]/) then
 812:               self.yacc_value = "[]"
 813:               return :tAREF
 814:             else
 815:               rb_compile_error "unexpected '['"
 816:             end
 817:           elsif lex_state == :expr_beg || lex_state == :expr_mid then
 818:             result = :tLBRACK
 819:           elsif lex_state.is_argument && space_seen then
 820:             result = :tLBRACK
 821:           end
 822: 
 823:           self.expr_beg_push "["
 824: 
 825:           return result
 826:         elsif src.scan(/\'(\\.|[^\'])*\'/) then
 827:           self.yacc_value = src.matched[1..2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
 828:           self.lex_state = :expr_end
 829:           return :tSTRING
 830:         elsif src.check(/\|/) then
 831:           if src.scan(/\|\|\=/) then
 832:             self.lex_state = :expr_beg
 833:             self.yacc_value = "||"
 834:             return :tOP_ASGN
 835:           elsif src.scan(/\|\|/) then
 836:             self.lex_state = :expr_beg
 837:             self.yacc_value = "||"
 838:             return :tOROP
 839:           elsif src.scan(/\|\=/) then
 840:             self.lex_state = :expr_beg
 841:             self.yacc_value = "|"
 842:             return :tOP_ASGN
 843:           elsif src.scan(/\|/) then
 844:             self.fix_arg_lex_state
 845:             self.yacc_value = "|"
 846:             return :tPIPE
 847:           end
 848:         elsif src.scan(/\{/) then
 849:           result = if lex_state.is_argument || lex_state == :expr_end then
 850:                      :tLCURLY      #  block (primary)
 851:                    elsif lex_state == :expr_endarg then
 852:                      :tLBRACE_ARG  #  block (expr)
 853:                    else
 854:                      :tLBRACE      #  hash
 855:                    end
 856: 
 857:           self.expr_beg_push "{"
 858: 
 859:           return result
 860:         elsif src.scan(/[+-]/) then
 861:           sign = src.matched
 862:           utype, type = if sign == "+" then
 863:                           [:tUPLUS, :tPLUS]
 864:                         else
 865:                           [:tUMINUS, :tMINUS]
 866:                         end
 867: 
 868:           if lex_state == :expr_fname || lex_state == :expr_dot then
 869:             self.lex_state = :expr_arg
 870:             if src.scan(/@/) then
 871:               self.yacc_value = "#{sign}@"
 872:               return utype
 873:             else
 874:               self.yacc_value = sign
 875:               return type
 876:             end
 877:           end
 878: 
 879:           if src.scan(/\=/) then
 880:             self.lex_state = :expr_beg
 881:             self.yacc_value = sign
 882:             return :tOP_ASGN
 883:           end
 884: 
 885:           if (lex_state == :expr_beg || lex_state == :expr_mid ||
 886:               (lex_state.is_argument && space_seen && !src.check(/\s/))) then
 887:             if lex_state.is_argument then
 888:               arg_ambiguous
 889:             end
 890: 
 891:             self.lex_state = :expr_beg
 892:             self.yacc_value = sign
 893: 
 894:             if src.check(/\d/) then
 895:               if utype == :tUPLUS then
 896:                 return self.parse_number
 897:               else
 898:                 return :tUMINUS_NUM
 899:               end
 900:             end
 901: 
 902:             return utype
 903:           end
 904: 
 905:           self.lex_state = :expr_beg
 906:           self.yacc_value = sign
 907:           return type
 908:         elsif src.check(/\*/) then
 909:           if src.scan(/\*\*=/) then
 910:             self.lex_state = :expr_beg
 911:             self.yacc_value = "**"
 912:             return :tOP_ASGN
 913:           elsif src.scan(/\*\*/) then
 914:             self.yacc_value = "**"
 915:             self.fix_arg_lex_state
 916:             return :tPOW
 917:           elsif src.scan(/\*\=/) then
 918:             self.lex_state = :expr_beg
 919:             self.yacc_value = "*"
 920:             return :tOP_ASGN
 921:           elsif src.scan(/\*/) then
 922:             result = if lex_state.is_argument && space_seen && src.check(/\S/) then
 923:                        warning("`*' interpreted as argument prefix")
 924:                        :tSTAR
 925:                      elsif lex_state == :expr_beg || lex_state == :expr_mid then
 926:                        :tSTAR
 927:                      else
 928:                        :tSTAR2
 929:                      end
 930:             self.yacc_value = "*"
 931:             self.fix_arg_lex_state
 932: 
 933:             return result
 934:           end
 935:         elsif src.check(/\!/) then
 936:           if src.scan(/\!\=/) then
 937:             self.lex_state = :expr_beg
 938:             self.yacc_value = "!="
 939:             return :tNEQ
 940:           elsif src.scan(/\!~/) then
 941:             self.lex_state = :expr_beg
 942:             self.yacc_value = "!~"
 943:             return :tNMATCH
 944:           elsif src.scan(/\!/) then
 945:             self.lex_state = :expr_beg
 946:             self.yacc_value = "!"
 947:             return :tBANG
 948:           end
 949:         elsif src.check(/\</) then
 950:           if src.scan(/\<\=\>/) then
 951:             self.fix_arg_lex_state
 952:             self.yacc_value = "<=>"
 953:             return :tCMP
 954:           elsif src.scan(/\<\=/) then
 955:             self.fix_arg_lex_state
 956:             self.yacc_value = "<="
 957:             return :tLEQ
 958:           elsif src.scan(/\<\<\=/) then
 959:             self.fix_arg_lex_state
 960:             self.lex_state = :expr_beg
 961:             self.yacc_value = "\<\<"
 962:             return :tOP_ASGN
 963:           elsif src.scan(/\<\</) then
 964:             if (! [:expr_end,    :expr_dot,
 965:                    :expr_endarg, :expr_class].include?(lex_state) &&
 966:                 (!lex_state.is_argument || space_seen)) then
 967:               tok = self.heredoc_identifier
 968:               if tok then
 969:                 return tok
 970:               end
 971:             end
 972: 
 973:             self.fix_arg_lex_state
 974:             self.yacc_value = "\<\<"
 975:             return :tLSHFT
 976:           elsif src.scan(/\</) then
 977:             self.fix_arg_lex_state
 978:             self.yacc_value = "<"
 979:             return :tLT
 980:           end
 981:         elsif src.check(/\>/) then
 982:           if src.scan(/\>\=/) then
 983:             self.fix_arg_lex_state
 984:             self.yacc_value = ">="
 985:             return :tGEQ
 986:           elsif src.scan(/\>\>=/) then
 987:             self.fix_arg_lex_state
 988:             self.lex_state = :expr_beg
 989:             self.yacc_value = ">>"
 990:             return :tOP_ASGN
 991:           elsif src.scan(/\>\>/) then
 992:             self.fix_arg_lex_state
 993:             self.yacc_value = ">>"
 994:             return :tRSHFT
 995:           elsif src.scan(/\>/) then
 996:             self.fix_arg_lex_state
 997:             self.yacc_value = ">"
 998:             return :tGT
 999:           end
1000:         elsif src.scan(/\`/) then
1001:           self.yacc_value = "`"
1002:           case lex_state
1003:           when :expr_fname then
1004:             self.lex_state = :expr_end
1005:             return :tBACK_REF2
1006:           when :expr_dot then
1007:             self.lex_state = if command_state then
1008:                                :expr_cmdarg
1009:                              else
1010:                                :expr_arg
1011:                              end
1012:             return :tBACK_REF2
1013:           end
1014:           self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\00""]
1015:           return :tXSTRING_BEG
1016:         elsif src.scan(/\?/) then
1017:           if lex_state == :expr_end || lex_state == :expr_endarg then
1018:             self.lex_state = :expr_beg
1019:             self.yacc_value = "?"
1020:             return :tEH
1021:           end
1022: 
1023:           if src.eos? then
1024:             rb_compile_error "incomplete character syntax"
1025:           end
1026: 
1027:           if src.check(/\s|\v/) then
1028:             unless lex_state.is_argument then
1029:               c2 = { " " => 's',
1030:                     "\n" => 'n',
1031:                     "\t" => 't',
1032:                     "\v" => 'v',
1033:                     "\r" => 'r',
1034:                     "\f" => 'f' }[src.matched]
1035: 
1036:               if c2 then
1037:                 warning("invalid character syntax; use ?\\" + c2)
1038:               end
1039:             end
1040: 
1041:             # ternary
1042:             self.lex_state = :expr_beg
1043:             self.yacc_value = "?"
1044:             return :tEH
1045:           elsif src.check(/\w(?=\w)/) then # ternary, also
1046:             self.lex_state = :expr_beg
1047:             self.yacc_value = "?"
1048:             return :tEH
1049:           end
1050: 
1051:           c = if src.scan(/\\/) then
1052:                 self.read_escape
1053:               else
1054:                 src.getch
1055:               end
1056:           self.lex_state = :expr_end
1057:           self.yacc_value = c[0].ord & 0xff
1058:           return :tINTEGER
1059:         elsif src.check(/\&/) then
1060:           if src.scan(/\&\&\=/) then
1061:             self.yacc_value = "&&"
1062:             self.lex_state = :expr_beg
1063:             return :tOP_ASGN
1064:           elsif src.scan(/\&\&/) then
1065:             self.lex_state = :expr_beg
1066:             self.yacc_value = "&&"
1067:             return :tANDOP
1068:           elsif src.scan(/\&\=/) then
1069:             self.yacc_value = "&"
1070:             self.lex_state = :expr_beg
1071:             return :tOP_ASGN
1072:           elsif src.scan(/&/) then
1073:             result = if lex_state.is_argument && space_seen &&
1074:                          !src.check(/\s/) then
1075:                        warning("`&' interpreted as argument prefix")
1076:                        :tAMPER
1077:                      elsif lex_state == :expr_beg || lex_state == :expr_mid then
1078:                        :tAMPER
1079:                      else
1080:                        :tAMPER2
1081:                      end
1082: 
1083:             self.fix_arg_lex_state
1084:             self.yacc_value = "&"
1085:             return result
1086:           end
1087:         elsif src.scan(/\//) then
1088:           if lex_state == :expr_beg || lex_state == :expr_mid then
1089:             self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
1090:             self.yacc_value = "/"
1091:             return :tREGEXP_BEG
1092:           end
1093: 
1094:           if src.scan(/\=/) then
1095:             self.yacc_value = "/"
1096:             self.lex_state = :expr_beg
1097:             return :tOP_ASGN
1098:           end
1099: 
1100:           if lex_state.is_argument && space_seen then
1101:             unless src.scan(/\s/) then
1102:               arg_ambiguous
1103:               self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
1104:               self.yacc_value = "/"
1105:               return :tREGEXP_BEG
1106:             end
1107:           end
1108: 
1109:           self.fix_arg_lex_state
1110:           self.yacc_value = "/"
1111: 
1112:           return :tDIVIDE
1113:         elsif src.scan(/\^=/) then
1114:           self.lex_state = :expr_beg
1115:           self.yacc_value = "^"
1116:           return :tOP_ASGN
1117:         elsif src.scan(/\^/) then
1118:           self.fix_arg_lex_state
1119:           self.yacc_value = "^"
1120:           return :tCARET
1121:         elsif src.scan(/\;/) then
1122:           self.command_start = true
1123:           self.lex_state = :expr_beg
1124:           self.yacc_value = ";"
1125:           return :tSEMI
1126:         elsif src.scan(/\~/) then
1127:           if lex_state == :expr_fname || lex_state == :expr_dot then
1128:             src.scan(/@/)
1129:           end
1130: 
1131:           self.fix_arg_lex_state
1132:           self.yacc_value = "~"
1133: 
1134:           return :tTILDE
1135:         elsif src.scan(/\\/) then
1136:           if src.scan(/\n/) then
1137:             self.lineno = nil
1138:             space_seen = true
1139:             next
1140:           end
1141:           rb_compile_error "bare backslash only allowed before newline"
1142:         elsif src.scan(/\%/) then
1143:           if lex_state == :expr_beg || lex_state == :expr_mid then
1144:             return parse_quote
1145:           end
1146: 
1147:           if src.scan(/\=/) then
1148:             self.lex_state = :expr_beg
1149:             self.yacc_value = "%"
1150:             return :tOP_ASGN
1151:           end
1152: 
1153:           if lex_state.is_argument && space_seen && ! src.check(/\s/) then
1154:             return parse_quote
1155:           end
1156: 
1157:           self.fix_arg_lex_state
1158:           self.yacc_value = "%"
1159: 
1160:           return :tPERCENT
1161:         elsif src.check(/\$/) then
1162:           if src.scan(/(\$_)(\w+)/) then
1163:             self.lex_state = :expr_end
1164:             self.token = src.matched
1165:             return process_token(command_state)
1166:           elsif src.scan(/\$_/) then
1167:             self.lex_state = :expr_end
1168:             self.token = src.matched
1169:             self.yacc_value = src.matched
1170:             return :tGVAR
1171:           elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1172:             self.lex_state = :expr_end
1173:             self.yacc_value = src.matched
1174:             return :tGVAR
1175:           elsif src.scan(/\$([\&\`\'\+])/) then
1176:             self.lex_state = :expr_end
1177:             # Explicit reference to these vars as symbols...
1178:             if last_state == :expr_fname then
1179:               self.yacc_value = src.matched
1180:               return :tGVAR
1181:             else
1182:               self.yacc_value = src[1].to_sym
1183:               return :tBACK_REF
1184:             end
1185:           elsif src.scan(/\$([1-9]\d*)/) then
1186:             self.lex_state = :expr_end
1187:             if last_state == :expr_fname then
1188:               self.yacc_value = src.matched
1189:               return :tGVAR
1190:             else
1191:               self.yacc_value = src[1].to_i
1192:               return :tNTH_REF
1193:             end
1194:           elsif src.scan(/\$0/) then
1195:             self.lex_state = :expr_end
1196:             self.token = src.matched
1197:             return process_token(command_state)
1198:           elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1199:             self.lex_state = :expr_end
1200:             self.yacc_value = "$"
1201:             return "$"
1202:           elsif src.scan(/\$\w+/)
1203:             self.lex_state = :expr_end
1204:             self.token = src.matched
1205:             return process_token(command_state)
1206:           end
1207:         elsif src.check(/\_/) then
1208:           if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
1209:             self.lineno = nil
1210:             return RubyLexer::EOF
1211:           elsif src.scan(/\_\w*/) then
1212:             self.token = src.matched
1213:             return process_token(command_state)
1214:           end
1215:         end
1216:       end # END OF CASE
1217: 
1218:       if src.scan(/\0004|\0032|\0000/) || src.eos? then # ^D, ^Z, EOF
1219:         return RubyLexer::EOF
1220:       else # alpha check
1221:         if src.scan(/\W/) then
1222:           rb_compile_error "Invalid char #{src.matched.inspect} in expression"
1223:         end
1224:       end
1225: 
1226:       self.token = src.matched if self.src.scan(/\w+/)
1227: 
1228:       return process_token(command_state)
1229:     end
1230:   end
yylex_string() click to toggle source
      # File lib/ruby_lexer.rb, line 1314
1314:   def yylex_string # 23 lines
1315:     token = if lex_strterm[0] == :heredoc then
1316:               self.heredoc lex_strterm
1317:             else
1318:               self.parse_string lex_strterm
1319:             end
1320: 
1321:     if token == :tSTRING_END || token == :tREGEXP_END then
1322:       self.lineno      = nil
1323:       self.lex_strterm = nil
1324:       self.lex_state   = :expr_end
1325:     end
1326: 
1327:     return token
1328:   end

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.