class Puppet::Parser::Lexer

Constants

DQ_continuation_token_types
DQ_initial_token_types
KEYWORDS
TOKENS

Attributes

file[R]
indefine[RW]
indefine?[RW]
last[R]
lexing_context[R]
line[RW]
token_queue[R]

Public Class Methods

new() click to toggle source
# File lib/puppet/parser/lexer.rb, line 382
def initialize
  initvars
end

Public Instance Methods

clear() click to toggle source
# File lib/puppet/parser/lexer.rb, line 314
def clear
  initvars
end
commentpop() click to toggle source

returns the content of the currently accumulated content cache

# File lib/puppet/parser/lexer.rb, line 575
def commentpop
  @commentstack.pop[0]
end
commentpush() click to toggle source
# File lib/puppet/parser/lexer.rb, line 589
def commentpush
  @commentstack.push(['', @line])
end
expected() click to toggle source
# File lib/puppet/parser/lexer.rb, line 318
def expected
  return nil if @expected.empty?
  name = @expected[-1]
  TOKENS.lookup(name) or lex_error "Could not find expected token #{name}"
end
file=(file) click to toggle source
# File lib/puppet/parser/lexer.rb, line 337
def file=(file)
  @file = file
  @line = 1
  contents = File.exists?(file) ? File.read(file) : ""
  @scanner = StringScanner.new(contents)
end
find_regex_token() click to toggle source

Find the next token that matches a regex. We look for these first.

# File lib/puppet/parser/lexer.rb, line 358
def find_regex_token
  best_token = nil
  best_length = 0

  # I tried optimizing based on the first char, but it had
  # a slightly negative affect and was a good bit more complicated.
  TOKENS.regex_tokens.each do |token|
    if length = @scanner.match?(token.regex) and token.acceptable?(lexing_context)
      # We've found a longer match
      if length > best_length
        best_length = length
        best_token = token
      end
    end
  end

  return best_token, @scanner.scan(best_token.regex) if best_token
end
find_string_token() click to toggle source
# File lib/puppet/parser/lexer.rb, line 346
def find_string_token
  # We know our longest string token is three chars, so try each size in turn
  # until we either match or run out of chars.  This way our worst-case is three
  # tries, where it is otherwise the number of string token we have.  Also,
  # the lookups are optimized hash lookups, instead of regex scans.
  #
  s = @scanner.peek(3)
  token = TOKENS.lookup(s[0,3]) || TOKENS.lookup(s[0,2]) || TOKENS.lookup(s[0,1])
  [ token, token && @scanner.scan(token.regex) ]
end
find_token() click to toggle source

Find the next token, returning the string and the token.

# File lib/puppet/parser/lexer.rb, line 378
def find_token
  shift_token || find_regex_token || find_string_token
end
fullscan() click to toggle source

scan the whole file basically just used for testing

# File lib/puppet/parser/lexer.rb, line 326
def fullscan
  array = []

  self.scan { |token, str|
    # Ignore any definition nesting problems
    @indefine = false
    array.push([token,str])
  }
  array
end
getcomment(line = nil) click to toggle source
# File lib/puppet/parser/lexer.rb, line 579
def getcomment(line = nil)
  comment = @commentstack.last
  if line.nil? or comment[1] <= line
    @commentstack.pop
    @commentstack.push(['', @line])
    return comment[0]
  end
  ''
end
initvars() click to toggle source
# File lib/puppet/parser/lexer.rb, line 386
def initvars
  @line = 1
  @previous_token = nil
  @scanner = nil
  @file = nil
  # AAARRGGGG! okay, regexes in ruby are bloody annoying
  # no one else has "\n" =~ /\s/
  @skip = %r{[ \t\r]+}

  @namestack = []
  @token_queue = []
  @indefine = false
  @expected = []
  @commentstack = [ ['', @line] ]
  @lexing_context = {
    :after => nil,
    :start_of_line => true,
    :string_interpolation_depth => 0
    }
end
lex_error(msg) click to toggle source
# File lib/puppet/parser/lexer.rb, line 23
def lex_error msg
  raise Puppet::LexError.new(msg)
end
munge_token(token, value) click to toggle source

Make any necessary changes to the token and/or value.

# File lib/puppet/parser/lexer.rb, line 408
def munge_token(token, value)
  @line += 1 if token.incr_line

  skip if token.skip_text

  return if token.skip and not token.accumulate?

  token, value = token.convert(self, value) if token.respond_to?(:convert)

  return unless token

  if token.accumulate?
    comment = @commentstack.pop
    comment[0] << value + "\n"
    @commentstack.push(comment)
  end

  return if token.skip

  return token, { :value => value, :line => @line }
end
namespace() click to toggle source

Collect the current namespace.

# File lib/puppet/parser/lexer.rb, line 438
def namespace
  @namestack.join("::")
end
scan() { |name, token_value| ... } click to toggle source

this is the heart of the lexer

# File lib/puppet/parser/lexer.rb, line 445
def scan
  #Puppet.debug("entering scan")
  lex_error "Invalid or empty string" unless @scanner

  # Skip any initial whitespace.
  skip

  until token_queue.empty? and @scanner.eos? do
    yielded = false
    matched_token, value = find_token

    # error out if we didn't match anything at all
    lex_error "Could not match #{@scanner.rest[/^(\S+|\s+|.*)/]}" unless matched_token

    newline = matched_token.name == :RETURN

    # this matches a blank line; eat the previously accumulated comments
    getcomment if lexing_context[:start_of_line] and newline
    lexing_context[:start_of_line] = newline

    final_token, token_value = munge_token(matched_token, value)

    unless final_token
      skip
      next
    end

    lexing_context[:after]         = final_token.name unless newline
    lexing_context[:string_interpolation_depth] += 1 if final_token.name == :DQPRE
    lexing_context[:string_interpolation_depth] -= 1 if final_token.name == :DQPOST

    value = token_value[:value]

    if match = @@pairs[value] and final_token.name != :DQUOTE and final_token.name != :SQUOTE
      @expected << match
    elsif exp = @expected[-1] and exp == value and final_token.name != :DQUOTE and final_token.name != :SQUOTE
      @expected.pop
    end

    if final_token.name == :LBRACE or final_token.name == :LPAREN
      commentpush
    end
    if final_token.name == :RPAREN
      commentpop
    end

    yield [final_token.name, token_value]

    if @previous_token
      namestack(value) if @previous_token.name == :CLASS and value != '{'

      if @previous_token.name == :DEFINE
        if indefine?
          msg = "Cannot nest definition #{value} inside #{@indefine}"
          self.indefine = false
          raise Puppet::ParseError, msg
        end

        @indefine = value
      end
    end
    @previous_token = final_token
    skip
  end
  @scanner = nil

  # This indicates that we're done parsing.
  yield [false,false]
end
skip() click to toggle source

Skip any skipchars in our remaining string.

# File lib/puppet/parser/lexer.rb, line 516
def skip
  @scanner.skip(@skip)
end
slurpstring(terminators,escapes=%w{ \\ $ ' " r n t s }+["\n"],ignore_invalid_escapes=false) click to toggle source

we’ve encountered the start of a string… slurp in the rest of the string and return it

# File lib/puppet/parser/lexer.rb, line 526
def slurpstring(terminators,escapes=%w{ \\  $ ' " r n t s }+["\n"],ignore_invalid_escapes=false)
  # we search for the next quote that isn't preceded by a
  # backslash; the caret is there to match empty strings
  str = @scanner.scan_until(/([^\]|^|[^\])([\]{2})*[#{terminators}]/) or lex_error "Unclosed quote after '#{last}' in '#{rest}'"
  @line += str.count("\n") # literal carriage returns add to the line count.
  str.gsub!(/\(.)/) {
    ch = $1
    if escapes.include? ch
      case ch
      when 'r'; "\r"
      when 'n'; "\n"
      when 't'; "\t"
      when 's'; " "
      when "\n"; ''
      else      ch
      end
    else
      Puppet.warning "Unrecognised escape sequence '\\#{ch}'#{file && " in file #{file}"}#{line && " at line #{line}"}" unless ignore_invalid_escapes
      "\\#{ch}"
    end
  }
  [ str[0..-2],str[-1,1] ]
end
string=(string) click to toggle source

just parse a string, not a whole file

# File lib/puppet/parser/lexer.rb, line 570
def string=(string)
  @scanner = StringScanner.new(string)
end
tokenize_interpolated_string(token_type,preamble='') click to toggle source
# File lib/puppet/parser/lexer.rb, line 550
def tokenize_interpolated_string(token_type,preamble='')
  value,terminator = slurpstring('"$')
  token_queue << [TOKENS[token_type[terminator]],preamble+value]
  variable_regex = if Puppet[:allow_variables_with_dashes]
                     TOKENS[:VARIABLE_WITH_DASH].regex
                   else
                     TOKENS[:VARIABLE].regex
                   end
  if terminator != '$' or @scanner.scan(/\{/)
    token_queue.shift
  elsif var_name = @scanner.scan(variable_regex)
    warn_if_variable_has_hyphen(var_name)
    token_queue << [TOKENS[:VARIABLE],var_name]
    tokenize_interpolated_string(DQ_continuation_token_types)
  else
    tokenize_interpolated_string(token_type,token_queue.pop.last + terminator)
  end
end
warn_if_variable_has_hyphen(var_name) click to toggle source
# File lib/puppet/parser/lexer.rb, line 593
def warn_if_variable_has_hyphen(var_name)
  if var_name.include?('-')
    Puppet.deprecation_warning("Using `-` in variable names is deprecated at #{file || '<string>'}:#{line}. See http://links.puppetlabs.com/puppet-hyphenated-variable-deprecation")
  end
end