class Prism::Translation::Parser::Lexer
接受 prism 标记列表,并将其转换为 parser gem 所期望的格式。
常量
- LAMBDA_TOKEN_TYPES
它用于确定 `do` 的标记类型是 `kDO` 还是 `kDO_LAMBDA`。
注意:在诸如 `-> (foo = -> (bar) {}) do end` 这样的边缘情况下,请注意仍然返回 `kDO` 而不是 `kDO_LAMBDA`,这是符合预期的:github.com/ruby/prism/pull/3046
- LPAREN_CONVERSION_TOKEN_TYPES
Prism
中的 `PARENTHESIS_LEFT` 标记在Parser
gem 中被分类为 `tLPAREN` 或 `tLPAREN2`。以下标记类型被列为分类为 `tLPAREN` 的类型。- TYPES
两个词法分析器之间类型的直接转换。
属性
lexed[R]
一个包含 prism 标记及其在词法分析时的关联词法状态的元组数组。
offset_cache[R]
一个将字节偏移量映射到字符偏移量的哈希。
source_buffer[R]
从中分析标记的 Parser::Source::Buffer。
公共类方法
new(source_buffer, lexed, offset_cache) 点击以切换源代码
使用给定的源缓冲区、prism 标记和偏移缓存初始化词法分析器。
# File prism/translation/parser/lexer.rb, line 217 def initialize(source_buffer, lexed, offset_cache) @source_buffer = source_buffer @lexed = lexed @offset_cache = offset_cache end
公共实例方法
to_a() 点击以切换源代码
将 prism 标记转换为 parser gem 所期望的格式。
# File prism/translation/parser/lexer.rb, line 227 def to_a tokens = [] index = 0 length = lexed.length heredoc_identifier_stack = [] while index < length token, state = lexed[index] index += 1 next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type) type = TYPES.fetch(token.type) value = token.value location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset]) case type when :kDO types = tokens.map(&:first) nearest_lambda_token_type = types.reverse.find { |type| LAMBDA_TOKEN_TYPES.include?(type) } if nearest_lambda_token_type == :tLAMBDA type = :kDO_LAMBDA end when :tCHARACTER value.delete_prefix!("?") when :tCOMMENT if token.type == :EMBDOC_BEGIN start_index = index while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1) value += next_token.value index += 1 end if start_index != index value += next_token.value location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset]) index += 1 end else value.chomp! location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1]) end when :tNL value = nil when :tFLOAT value = parse_float(value) when :tIMAGINARY value = parse_complex(value) when :tINTEGER if value.start_with?("+") tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]] location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset]) end value = parse_integer(value) when :tLABEL value.chomp!(":") when :tLABEL_END value.chomp!(":") when :tLCURLY type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL when :tLPAREN2 type = :tLPAREN if tokens.empty? || LPAREN_CONVERSION_TOKEN_TYPES.include?(tokens.dig(-1, 0)) when :tNTH_REF value = parse_integer(value.delete_prefix("$")) when :tOP_ASGN value.chomp!("=") when :tRATIONAL value = parse_rational(value) when :tSPACE value = nil when :tSTRING_BEG if token.type == :HEREDOC_START heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier]) end if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END next_location = token.location.join(next_token.location) type = :tSTRING value = "" location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) index += 1 elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END next_location = token.location.join(next_next_token.location) type = :tSTRING value = next_token.value.gsub("\\\\", "\\") location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) index += 2 elsif value.start_with?("<<") quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2] if quote == "`" type = :tXSTRING_BEG value = "<<`" else value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}" end end when :tSTRING_CONTENT unless (lines = token.value.lines).one? start_offset = offset_cache[token.location.start_offset] lines.map do |line| newline = line.end_with?("\r\n") ? "\r\n" : "\n" chomped_line = line.chomp if match = chomped_line.match(/(?<backslashes>\\+)\z/) adjustment = match[:backslashes].size / 2 adjusted_line = chomped_line.delete_suffix("\\" * adjustment) if match[:backslashes].size.odd? adjusted_line.delete_suffix!("\\") adjustment += 2 else adjusted_line << newline end else adjusted_line = line adjustment = 0 end end_offset = start_offset + adjusted_line.length + adjustment tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]] start_offset = end_offset end next end when :tSTRING_DVAR value = nil when :tSTRING_END if token.type == :HEREDOC_END && value.end_with?("\n") newline_length = value.end_with?("\r\n") ? 2 : 1 value = heredoc_identifier_stack.pop location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length]) elsif token.type == :REGEXP_END value = value[0] location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1]) end when :tSYMBEG if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END next_location = token.location.join(next_token.location) type = :tSYMBOL value = next_token.value value = { "~@" => "~", "!@" => "!" }.fetch(value, value) location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) index += 1 end when :tFID if !tokens.empty? && tokens.dig(-1, 0) == :kDEF type = :tIDENTIFIER end when :tXSTRING_BEG if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END type = :tBACK_REF2 end end tokens << [type, [value, location]] if token.type == :REGEXP_END tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]] end end tokens end
私有实例方法
parse_complex(value) 点击以切换源代码
从字符串表示形式解析复数。
# File prism/translation/parser/lexer.rb, line 409 def parse_complex(value) value.chomp!("i") if value.end_with?("r") Complex(0, parse_rational(value)) elsif value.start_with?(/0[BbOoDdXx]/) Complex(0, parse_integer(value)) else Complex(0, value) end rescue ArgumentError 0i end
parse_float(value) 点击以切换源代码
从字符串表示形式解析浮点数。
# File prism/translation/parser/lexer.rb, line 402 def parse_float(value) Float(value) rescue ArgumentError 0.0 end
parse_integer(value) 点击以切换源代码
从字符串表示形式解析整数。
# File prism/translation/parser/lexer.rb, line 395 def parse_integer(value) Integer(value) rescue ArgumentError 0 end
parse_rational(value) 点击以切换源代码
从字符串表示形式解析有理数。
# File prism/translation/parser/lexer.rb, line 424 def parse_rational(value) value.chomp!("r") if value.start_with?(/0[BbOoDdXx]/) Rational(parse_integer(value)) else Rational(value) end rescue ArgumentError 0r end