class RDoc::Markup::Parser

用于 RDoc 标记的递归下降解析器。

该解析器将输入字符串标记化,然后将标记解析为文档。可以通过编写诸如 RDoc::Markup::ToHTML 之类的访问器将文档转换为输出格式。

该解析器仅处理块级构造,例如 Paragraph、List、ListItem、Heading、Verbatim、BlankLine、Rule 和 BlockQuote。诸如 +blah+ 之类的内联标记由 RDoc::Markup::AttributeManager 单独处理。

要了解 Parser 实现了哪些标记,请阅读 RDoc。要了解如何使用 RDoc 标记来格式化程序中的文本,请阅读 RDoc::Markup

常量

LIST_TOKENS

列表标记类型

属性

debug[RW]

启用调试信息的显示

tokens[R]

标记访问器

公共类方法

new() 点击以切换源代码

创建一个新的 Parser。另请参阅 ::parse

# File rdoc/markup/parser.rb, line 79
def initialize
  @binary_input   = nil
  @current_token  = nil
  @debug          = false
  @s              = nil
  @tokens         = []
end
parse(str) 点击以切换源代码

str 解析为文档。

请使用 RDoc::Markup#parse 代替此方法。

# File rdoc/markup/parser.rb, line 60
def self.parse str
  parser = new
  parser.tokenize str
  doc = RDoc::Markup::Document.new
  parser.parse doc
end
tokenize(str) 点击以切换源代码

返回 str 的标记流,用于测试

# File rdoc/markup/parser.rb, line 70
def self.tokenize str
  parser = new
  parser.tokenize str
  parser.tokens
end

公共实例方法

build_heading(level) 点击以切换源代码

构建 level 级别的标题

# File rdoc/markup/parser.rb, line 90
def build_heading level
  type, text, = get

  text = case type
         when :TEXT then
           skip :NEWLINE
           text
         else
           unget
           ''
         end

  RDoc::Markup::Heading.new level, text
end
build_list(margin) 点击以切换源代码

构建与 margin 对齐的列表

# File rdoc/markup/parser.rb, line 108
def build_list margin
  p :list_start => margin if @debug

  list = RDoc::Markup::List.new
  label = nil

  until @tokens.empty? do
    type, data, column, = get

    case type
    when *LIST_TOKENS then
      if column < margin || (list.type && list.type != type) then
        unget
        break
      end

      list.type = type
      peek_type, _, column, = peek_token

      case type
      when :NOTE, :LABEL then
        label = [] unless label

        if peek_type == :NEWLINE then
          # description not on the same line as LABEL/NOTE
          # skip the trailing newline & any blank lines below
          while peek_type == :NEWLINE
            get
            peek_type, _, column, = peek_token
          end

          # we may be:
          #   - at end of stream
          #   - at a column < margin:
          #         [text]
          #       blah blah blah
          #   - at the same column, but with a different type of list item
          #       [text]
          #       * blah blah
          #   - at the same column, with the same type of list item
          #       [one]
          #       [two]
          # In all cases, we have an empty description.
          # In the last case only, we continue.
          if peek_type.nil? || column < margin then
            empty = true
          elsif column == margin then
            case peek_type
            when type
              empty = :continue
            when *LIST_TOKENS
              empty = true
            else
              empty = false
            end
          else
            empty = false
          end

          if empty then
            label << data
            next if empty == :continue
            break
          end
        end
      else
        data = nil
      end

      if label then
        data = label << data
        label = nil
      end

      list_item = RDoc::Markup::ListItem.new data
      parse list_item, column
      list << list_item

    else
      unget
      break
    end
  end

  p :list_end => margin if @debug

  if list.empty? then
    return nil unless label
    return nil unless [:LABEL, :NOTE].include? list.type

    list_item = RDoc::Markup::ListItem.new label, RDoc::Markup::BlankLine.new
    list << list_item
  end

  list
end
build_paragraph(margin) 点击以切换源代码

构建与 margin 对齐的段落

# File rdoc/markup/parser.rb, line 208
def build_paragraph margin
  p :paragraph_start => margin if @debug

  paragraph = RDoc::Markup::Paragraph.new

  until @tokens.empty? do
    type, data, column, = get

    if type == :TEXT and column == margin then
      paragraph << data

      break if peek_token.first == :BREAK

      data << ' ' if skip :NEWLINE and /#{SPACE_SEPARATED_LETTER_CLASS}\z/o.match?(data)
    else
      unget
      break
    end
  end

  paragraph.parts.last.sub!(/ \z/, '') # cleanup

  p :paragraph_end => margin if @debug

  paragraph
end
build_verbatim(margin) 点击以切换源代码

构建从 margin 缩进的 Verbatim。

verbatim 块向左移动(缩进最少的行从第 0 列开始)。verbatim 的每个部分都是一行文本,始终以换行符结尾。空行始终由单个换行符组成,并且在 verbatim 的末尾永远不会有单个换行符。

# File rdoc/markup/parser.rb, line 243
def build_verbatim margin
  p :verbatim_begin => margin if @debug
  verbatim = RDoc::Markup::Verbatim.new

  min_indent = nil
  generate_leading_spaces = true
  line = ''.dup

  until @tokens.empty? do
    type, data, column, = get

    if type == :NEWLINE then
      line << data
      verbatim << line
      line = ''.dup
      generate_leading_spaces = true
      next
    end

    if column <= margin
      unget
      break
    end

    if generate_leading_spaces then
      indent = column - margin
      line << ' ' * indent
      min_indent = indent if min_indent.nil? || indent < min_indent
      generate_leading_spaces = false
    end

    case type
    when :HEADER then
      line << '=' * data
      _, _, peek_column, = peek_token
      peek_column ||= column + data
      indent = peek_column - column - data
      line << ' ' * indent
    when :RULE then
      width = 2 + data
      line << '-' * width
      _, _, peek_column, = peek_token
      peek_column ||= column + width
      indent = peek_column - column - width
      line << ' ' * indent
    when :BREAK, :TEXT then
      line << data
    when :BLOCKQUOTE then
      line << '>>>'
      peek_type, _, peek_column = peek_token
      if peek_type != :NEWLINE and peek_column
        line << ' ' * (peek_column - column - 3)
      end
    else # *LIST_TOKENS
      list_marker = case type
                    when :BULLET then data
                    when :LABEL  then "[#{data}]"
                    when :NOTE   then "#{data}::"
                    else # :LALPHA, :NUMBER, :UALPHA
                      "#{data}."
                    end
      line << list_marker
      peek_type, _, peek_column = peek_token
      unless peek_type == :NEWLINE then
        peek_column ||= column + list_marker.length
        indent = peek_column - column - list_marker.length
        line << ' ' * indent
      end
    end

  end

  verbatim << line << "\n" unless line.empty?
  verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0
  verbatim.normalize

  p :verbatim_end => margin if @debug

  verbatim
end
get() 点击以切换源代码

从流中拉取下一个标记。

# File rdoc/markup/parser.rb, line 327
def get
  @current_token = @tokens.shift
  p :get => @current_token if @debug
  @current_token
end
parse(parent, indent = 0) 点击以切换源代码

将标记解析为 RDoc::Markup::XXX 对象数组,并将它们附加到传递的 parent RDoc::Markup::YYY 对象。

在标记流结束时退出,或者当它遇到列小于 indent 的标记时退出(除非它是 NEWLINE)。

返回 parent

# File rdoc/markup/parser.rb, line 342
def parse parent, indent = 0
  p :parse_start => indent if @debug

  until @tokens.empty? do
    type, data, column, = get

    case type
    when :BREAK then
      parent << RDoc::Markup::BlankLine.new
      skip :NEWLINE, false
      next
    when :NEWLINE then
      # trailing newlines are skipped below, so this is a blank line
      parent << RDoc::Markup::BlankLine.new
      skip :NEWLINE, false
      next
    end

    # indentation change: break or verbatim
    if column < indent then
      unget
      break
    elsif column > indent then
      unget
      parent << build_verbatim(indent)
      next
    end

    # indentation is the same
    case type
    when :HEADER then
      parent << build_heading(data)
    when :RULE then
      parent << RDoc::Markup::Rule.new(data)
      skip :NEWLINE
    when :TEXT then
      unget
      parse_text parent, indent
    when :BLOCKQUOTE then
      nil while (type, = get; type) and type != :NEWLINE
      _, _, column, = peek_token
      bq = RDoc::Markup::BlockQuote.new
      p :blockquote_start => [data, column] if @debug
      parse bq, column
      p :blockquote_end => indent if @debug
      parent << bq
    when *LIST_TOKENS then
      unget
      parent << build_list(indent)
    else
      type, data, column, line = @current_token
      raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
    end
  end

  p :parse_end => indent if @debug

  parent

end
peek_token() 点击以切换源代码

返回流中的下一个标记,而不修改流

# File rdoc/markup/parser.rb, line 413
def peek_token
  token = @tokens.first || []
  p :peek => token if @debug
  token
end
setup_scanner(input) 点击以切换源代码

创建 StringScanner

# File rdoc/markup/parser.rb, line 468
def setup_scanner input
  @s = MyStringScanner.new input
end
skip(token_type, error = true) 点击以切换源代码

如果下一个标记的类型是 token_type,则跳过它。

如果下一个标记不是预期类型,可以选择引发错误。

# File rdoc/markup/parser.rb, line 477
def skip token_type, error = true
  type, = get
  return unless type # end of stream
  return @current_token if token_type == type
  unget
  raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error
end
tokenize(input) 点击以切换源代码

将文本 input 转换为标记流

# File rdoc/markup/parser.rb, line 488
def tokenize input
  setup_scanner input

  until @s.eos? do
    pos = @s.pos

    # leading spaces will be reflected by the column of the next token
    # the only thing we loose are trailing spaces at the end of the file
    next if @s.scan(/ +/)

    # note: after BULLET, LABEL, etc.,
    # indent will be the column of the next non-newline token

    @tokens << case
               # [CR]LF => :NEWLINE
               when @s.scan(/\r?\n/) then
                 token = [:NEWLINE, @s.matched, *pos]
                 @s.newline!
                 token
               # === text => :HEADER then :TEXT
               when @s.scan(/(=+)(\s*)/) then
                 level = @s[1].length
                 header = [:HEADER, level, *pos]

                 if @s[2] =~ /^\r?\n/ then
                   @s.unscan(@s[2])
                   header
                 else
                   pos = @s.pos
                   @s.scan(/.*/)
                   @tokens << header
                   [:TEXT, @s.matched.sub(/\r$/, ''), *pos]
                 end
               # --- (at least 3) and nothing else on the line => :RULE
               when @s.scan(/(-{3,}) *\r?$/) then
                 [:RULE, @s[1].length - 2, *pos]
               # * or - followed by white space and text => :BULLET
               when @s.scan(/([*-]) +(\S)/) then
                 @s.unscan(@s[2])
                 [:BULLET, @s[1], *pos]
               # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
               when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
                 # FIXME if tab(s), the column will be wrong
                 # either support tabs everywhere by first expanding them to
                 # spaces, or assume that they will have been replaced
                 # before (and provide a check for that at least in debug
                 # mode)
                 list_label = @s[1]
                 @s.unscan(@s[2])
                 list_type =
                   case list_label
                   when /[a-z]/ then :LALPHA
                   when /[A-Z]/ then :UALPHA
                   when /\d/    then :NUMBER
                   else
                     raise ParseError, "BUG token #{list_label}"
                   end
                 [list_type, list_label, *pos]
               # [text] followed by spaces or end of line => :LABEL
               when @s.scan(/\[(.*?)\]( +|\r?$)/) then
                 [:LABEL, @s[1], *pos]
               # text:: followed by spaces or end of line => :NOTE
               when @s.scan(/(.*?)::( +|\r?$)/) then
                 [:NOTE, @s[1], *pos]
               # >>> followed by end of line => :BLOCKQUOTE
               when @s.scan(/>>> *(\w+)?$/) then
                 if word = @s[1]
                   @s.unscan(word)
                 end
                 [:BLOCKQUOTE, word, *pos]
               # anything else: :TEXT
               else
                 @s.scan(/(.*?)(  )?\r?$/)
                 token = [:TEXT, @s[1], *pos]

                 if @s[2] then
                   @tokens << token
                   [:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
                 else
                   token
                 end
               end
  end

  self
end
unget() 点击以切换源代码

将当前标记返回到标记流

# File rdoc/markup/parser.rb, line 578
def unget
  token = @current_token
  p :unget => token if @debug
  raise Error, 'too many #ungets' if token == @tokens.first
  @tokens.unshift token if token
end