# File lib/puppet/parser/lexer.rb, line 382 def initialize initvars end
# File lib/puppet/parser/lexer.rb, line 314 def clear initvars end
returns the content of the currently accumulated content cache
# File lib/puppet/parser/lexer.rb, line 575 def commentpop @commentstack.pop[0] end
# File lib/puppet/parser/lexer.rb, line 589 def commentpush @commentstack.push(['', @line]) end
# File lib/puppet/parser/lexer.rb, line 318 def expected return nil if @expected.empty? name = @expected[-1] TOKENS.lookup(name) or lex_error "Could not find expected token #{name}" end
# File lib/puppet/parser/lexer.rb, line 337 def file=(file) @file = file @line = 1 contents = File.exists?(file) ? File.read(file) : "" @scanner = StringScanner.new(contents) end
Find the next token that matches a regex. We look for these first.
# File lib/puppet/parser/lexer.rb, line 358 def find_regex_token best_token = nil best_length = 0 # I tried optimizing based on the first char, but it had # a slightly negative affect and was a good bit more complicated. TOKENS.regex_tokens.each do |token| if length = @scanner.match?(token.regex) and token.acceptable?(lexing_context) # We've found a longer match if length > best_length best_length = length best_token = token end end end return best_token, @scanner.scan(best_token.regex) if best_token end
# File lib/puppet/parser/lexer.rb, line 346 def find_string_token # We know our longest string token is three chars, so try each size in turn # until we either match or run out of chars. This way our worst-case is three # tries, where it is otherwise the number of string token we have. Also, # the lookups are optimized hash lookups, instead of regex scans. # s = @scanner.peek(3) token = TOKENS.lookup(s[0,3]) || TOKENS.lookup(s[0,2]) || TOKENS.lookup(s[0,1]) [ token, token && @scanner.scan(token.regex) ] end
Find the next token, returning the string and the token.
# File lib/puppet/parser/lexer.rb, line 378 def find_token shift_token || find_regex_token || find_string_token end
scan the whole file basically just used for testing
# File lib/puppet/parser/lexer.rb, line 326 def fullscan array = [] self.scan { |token, str| # Ignore any definition nesting problems @indefine = false array.push([token,str]) } array end
# File lib/puppet/parser/lexer.rb, line 579 def getcomment(line = nil) comment = @commentstack.last if line.nil? or comment[1] <= line @commentstack.pop @commentstack.push(['', @line]) return comment[0] end '' end
# File lib/puppet/parser/lexer.rb, line 386 def initvars @line = 1 @previous_token = nil @scanner = nil @file = nil # AAARRGGGG! okay, regexes in ruby are bloody annoying # no one else has "\n" =~ /\s/ @skip = %r{[ \t\r]+} @namestack = [] @token_queue = [] @indefine = false @expected = [] @commentstack = [ ['', @line] ] @lexing_context = { :after => nil, :start_of_line => true, :string_interpolation_depth => 0 } end
# File lib/puppet/parser/lexer.rb, line 23 def lex_error msg raise Puppet::LexError.new(msg) end
Make any necessary changes to the token and/or value.
# File lib/puppet/parser/lexer.rb, line 408 def munge_token(token, value) @line += 1 if token.incr_line skip if token.skip_text return if token.skip and not token.accumulate? token, value = token.convert(self, value) if token.respond_to?(:convert) return unless token if token.accumulate? comment = @commentstack.pop comment[0] << value + "\n" @commentstack.push(comment) end return if token.skip return token, { :value => value, :line => @line } end
Collect the current namespace.
# File lib/puppet/parser/lexer.rb, line 438 def namespace @namestack.join("::") end
this is the heart of the lexer
# File lib/puppet/parser/lexer.rb, line 445 def scan #Puppet.debug("entering scan") lex_error "Invalid or empty string" unless @scanner # Skip any initial whitespace. skip until token_queue.empty? and @scanner.eos? do yielded = false matched_token, value = find_token # error out if we didn't match anything at all lex_error "Could not match #{@scanner.rest[/^(\S+|\s+|.*)/]}" unless matched_token newline = matched_token.name == :RETURN # this matches a blank line; eat the previously accumulated comments getcomment if lexing_context[:start_of_line] and newline lexing_context[:start_of_line] = newline final_token, token_value = munge_token(matched_token, value) unless final_token skip next end lexing_context[:after] = final_token.name unless newline lexing_context[:string_interpolation_depth] += 1 if final_token.name == :DQPRE lexing_context[:string_interpolation_depth] -= 1 if final_token.name == :DQPOST value = token_value[:value] if match = @@pairs[value] and final_token.name != :DQUOTE and final_token.name != :SQUOTE @expected << match elsif exp = @expected[-1] and exp == value and final_token.name != :DQUOTE and final_token.name != :SQUOTE @expected.pop end if final_token.name == :LBRACE or final_token.name == :LPAREN commentpush end if final_token.name == :RPAREN commentpop end yield [final_token.name, token_value] if @previous_token namestack(value) if @previous_token.name == :CLASS and value != '{' if @previous_token.name == :DEFINE if indefine? msg = "Cannot nest definition #{value} inside #{@indefine}" self.indefine = false raise Puppet::ParseError, msg end @indefine = value end end @previous_token = final_token skip end @scanner = nil # This indicates that we're done parsing. yield [false,false] end
Skip any skipchars in our remaining string.
# File lib/puppet/parser/lexer.rb, line 516 def skip @scanner.skip(@skip) end
we’ve encountered the start of a string… slurp in the rest of the string and return it
# File lib/puppet/parser/lexer.rb, line 526 def slurpstring(terminators,escapes=%w{ \\ $ ' " r n t s }+["\n"],ignore_invalid_escapes=false) # we search for the next quote that isn't preceded by a # backslash; the caret is there to match empty strings str = @scanner.scan_until(/([^\]|^|[^\])([\]{2})*[#{terminators}]/) or lex_error "Unclosed quote after '#{last}' in '#{rest}'" @line += str.count("\n") # literal carriage returns add to the line count. str.gsub!(/\(.)/) { ch = $1 if escapes.include? ch case ch when 'r'; "\r" when 'n'; "\n" when 't'; "\t" when 's'; " " when "\n"; '' else ch end else Puppet.warning "Unrecognised escape sequence '\\#{ch}'#{file && " in file #{file}"}#{line && " at line #{line}"}" unless ignore_invalid_escapes "\\#{ch}" end } [ str[0..-2],str[-1,1] ] end
just parse a string, not a whole file
# File lib/puppet/parser/lexer.rb, line 570 def string=(string) @scanner = StringScanner.new(string) end
# File lib/puppet/parser/lexer.rb, line 550 def tokenize_interpolated_string(token_type,preamble='') value,terminator = slurpstring('"$') token_queue << [TOKENS[token_type[terminator]],preamble+value] variable_regex = if Puppet[:allow_variables_with_dashes] TOKENS[:VARIABLE_WITH_DASH].regex else TOKENS[:VARIABLE].regex end if terminator != '$' or @scanner.scan(/\{/) token_queue.shift elsif var_name = @scanner.scan(variable_regex) warn_if_variable_has_hyphen(var_name) token_queue << [TOKENS[:VARIABLE],var_name] tokenize_interpolated_string(DQ_continuation_token_types) else tokenize_interpolated_string(token_type,token_queue.pop.last + terminator) end end
# File lib/puppet/parser/lexer.rb, line 593 def warn_if_variable_has_hyphen(var_name) if var_name.include?('-') Puppet.deprecation_warning("Using `-` in variable names is deprecated at #{file || '<string>'}:#{line}. See http://links.puppetlabs.com/puppet-hyphenated-variable-deprecation") end end