class Prism::Translation::Parser::Lexer

接受 prism 标记列表,并将其转换为 parser gem 所期望的格式。

常量

LAMBDA_TOKEN_TYPES

它用于确定 `do` 的标记类型是 `kDO` 还是 `kDO_LAMBDA`。

注意:在诸如 `-> (foo = -> (bar) {}) do end` 这样的边缘情况下,请注意仍然返回 `kDO` 而不是 `kDO_LAMBDA`,这是符合预期的:github.com/ruby/prism/pull/3046

LPAREN_CONVERSION_TOKEN_TYPES

Prism 中的 `PARENTHESIS_LEFT` 标记在 Parser gem 中被分类为 `tLPAREN` 或 `tLPAREN2`。以下标记类型被列为分类为 `tLPAREN` 的类型。

TYPES

两个词法分析器之间类型的直接转换。

属性

lexed[R]

一个包含 prism 标记及其在词法分析时的关联词法状态的元组数组。

offset_cache[R]

一个将字节偏移量映射到字符偏移量的哈希。

source_buffer[R]

从中分析标记的 Parser::Source::Buffer。

公共类方法

new(source_buffer, lexed, offset_cache) 点击以切换源代码

使用给定的源缓冲区、prism 标记和偏移缓存初始化词法分析器。

# File prism/translation/parser/lexer.rb, line 217
def initialize(source_buffer, lexed, offset_cache)
  @source_buffer = source_buffer
  @lexed = lexed
  @offset_cache = offset_cache
end

公共实例方法

to_a() 点击以切换源代码

将 prism 标记转换为 parser gem 所期望的格式。

# File prism/translation/parser/lexer.rb, line 227
def to_a
  tokens = []

  index = 0
  length = lexed.length

  heredoc_identifier_stack = []

  while index < length
    token, state = lexed[index]
    index += 1
    next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)

    type = TYPES.fetch(token.type)
    value = token.value
    location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])

    case type
    when :kDO
      types = tokens.map(&:first)
      nearest_lambda_token_type = types.reverse.find { |type| LAMBDA_TOKEN_TYPES.include?(type) }

      if nearest_lambda_token_type == :tLAMBDA
        type = :kDO_LAMBDA
      end
    when :tCHARACTER
      value.delete_prefix!("?")
    when :tCOMMENT
      if token.type == :EMBDOC_BEGIN
        start_index = index

        while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
          value += next_token.value
          index += 1
        end

        if start_index != index
          value += next_token.value
          location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
          index += 1
        end
      else
        value.chomp!
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
      end
    when :tNL
      value = nil
    when :tFLOAT
      value = parse_float(value)
    when :tIMAGINARY
      value = parse_complex(value)
    when :tINTEGER
      if value.start_with?("+")
        tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
        location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
      end

      value = parse_integer(value)
    when :tLABEL
      value.chomp!(":")
    when :tLABEL_END
      value.chomp!(":")
    when :tLCURLY
      type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
    when :tLPAREN2
      type = :tLPAREN if tokens.empty? || LPAREN_CONVERSION_TOKEN_TYPES.include?(tokens.dig(-1, 0))
    when :tNTH_REF
      value = parse_integer(value.delete_prefix("$"))
    when :tOP_ASGN
      value.chomp!("=")
    when :tRATIONAL
      value = parse_rational(value)
    when :tSPACE
      value = nil
    when :tSTRING_BEG
      if token.type == :HEREDOC_START
        heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
      end
      if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
        next_location = token.location.join(next_token.location)
        type = :tSTRING
        value = ""
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 1
      elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
        next_location = token.location.join(next_next_token.location)
        type = :tSTRING
        value = next_token.value.gsub("\\\\", "\\")
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 2
      elsif value.start_with?("<<")
        quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
        if quote == "`"
          type = :tXSTRING_BEG
          value = "<<`"
        else
          value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
        end
      end
    when :tSTRING_CONTENT
      unless (lines = token.value.lines).one?
        start_offset = offset_cache[token.location.start_offset]
        lines.map do |line|
          newline = line.end_with?("\r\n") ? "\r\n" : "\n"
          chomped_line = line.chomp
          if match = chomped_line.match(/(?<backslashes>\\+)\z/)
            adjustment = match[:backslashes].size / 2
            adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
            if match[:backslashes].size.odd?
              adjusted_line.delete_suffix!("\\")
              adjustment += 2
            else
              adjusted_line << newline
            end
          else
            adjusted_line = line
            adjustment = 0
          end

          end_offset = start_offset + adjusted_line.length + adjustment
          tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
          start_offset = end_offset
        end
        next
      end
    when :tSTRING_DVAR
      value = nil
    when :tSTRING_END
      if token.type == :HEREDOC_END && value.end_with?("\n")
        newline_length = value.end_with?("\r\n") ? 2 : 1
        value = heredoc_identifier_stack.pop
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
      elsif token.type == :REGEXP_END
        value = value[0]
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
      end
    when :tSYMBEG
      if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
        next_location = token.location.join(next_token.location)
        type = :tSYMBOL
        value = next_token.value
        value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 1
      end
    when :tFID
      if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
        type = :tIDENTIFIER
      end
    when :tXSTRING_BEG
      if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
        type = :tBACK_REF2
      end
    end

    tokens << [type, [value, location]]

    if token.type == :REGEXP_END
      tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
    end
  end

  tokens
end

私有实例方法

parse_complex(value) 点击以切换源代码

从字符串表示形式解析复数。

# File prism/translation/parser/lexer.rb, line 409
def parse_complex(value)
  value.chomp!("i")

  if value.end_with?("r")
    Complex(0, parse_rational(value))
  elsif value.start_with?(/0[BbOoDdXx]/)
    Complex(0, parse_integer(value))
  else
    Complex(0, value)
  end
rescue ArgumentError
  0i
end
parse_float(value) 点击以切换源代码

从字符串表示形式解析浮点数。

# File prism/translation/parser/lexer.rb, line 402
def parse_float(value)
  Float(value)
rescue ArgumentError
  0.0
end
parse_integer(value) 点击以切换源代码

从字符串表示形式解析整数。

# File prism/translation/parser/lexer.rb, line 395
def parse_integer(value)
  Integer(value)
rescue ArgumentError
  0
end
parse_rational(value) 点击以切换源代码

从字符串表示形式解析有理数。

# File prism/translation/parser/lexer.rb, line 424
def parse_rational(value)
  value.chomp!("r")

  if value.start_with?(/0[BbOoDdXx]/)
    Rational(parse_integer(value))
  else
    Rational(value)
  end
rescue ArgumentError
  0r
end