class REXML::Parsers::BaseParser

使用 Pull 解析器

此 API 是实验性的,可能会发生更改。

parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
while parser.has_next?
  res = parser.next
  puts res[1]['att'] if res.start_tag? and res[0] == 'b'
end

有关结果内容的信息,请参阅 PullEvent 类。数据与传递给 StreamListener API 的各种事件的参数相同。

请注意,

parser = PullParser.new( "<a>BAD DOCUMENT" )
while parser.has_next?
  res = parser.next
  raise res[1] if res.error?
end

Nat Price 为我提供了关于 API 的一些好主意。

常量

ATTDEF
ATTDEF_RE
ATTLISTDECL_PATTERN
ATTLISTDECL_START
ATTRIBUTE_PATTERN
ATTTYPE
ATTVALUE
CDATA_END
CDATA_PATTERN
CDATA_START
CLOSE_MATCH
COMBININGCHAR
COMMENT_PATTERN
COMMENT_START
DEFAULTDECL
DEFAULT_ENTITIES
DIGIT
DOCTYPE_END
DOCTYPE_START
ELEMENTDECL_PATTERN
ELEMENTDECL_START
ENCODING
ENTITYDECL
ENTITYDEF
ENTITYVALUE
ENTITY_START
ENUMERATEDTYPE
ENUMERATION
EREFERENCE
EXTENDER
EXTERNALID
EXTERNAL_ID_PUBLIC
EXTERNAL_ID_SYSTEM
GEDECL
INSTRUCTION_PATTERN
INSTRUCTION_START
LETTER
NAME
NAMECHAR
NCNAME_STR
NDATADECL
NMTOKEN
NMTOKENS
NOTATIONDECL_START
NOTATIONTYPE
PEDECL
PEDEF
PEREFERENCE
PUBIDCHAR

Entity 常量

PUBIDLITERAL
PUBLIC_ID
QNAME
QNAME_STR
REFERENCE
REFERENCE_RE
STANDALONE
SYSTEMENTITY
SYSTEMLITERAL
TAG_MATCH
TEXT_PATTERN
UNAME_STR

仅用于向后兼容。例如,kramdown 使用它。它在 REXML 中未使用。

VERSION
XMLDECL_PATTERN
XMLDECL_START

属性

entity_expansion_count[R]
entity_expansion_limit[W]
entity_expansion_text_limit[W]
source[R]

公共类方法

new( source ) 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 163
def initialize( source )
  self.stream = source
  @listeners = []
  @prefixes = Set.new
  @entity_expansion_count = 0
  @entity_expansion_limit = Security.entity_expansion_limit
  @entity_expansion_text_limit = Security.entity_expansion_text_limit
  @source.ensure_buffer
end

公共实例方法

add_listener( listener ) 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 173
def add_listener( listener )
  @listeners << listener
end
empty?() 点击以切换源代码

如果没有更多事件,则返回 true

# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 208
def empty?
  return (@source.empty? and @stack.empty?)
end
entity( reference, entities ) 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 540
def entity( reference, entities )
  return unless entities

  value = entities[ reference ]
  return if value.nil?

  record_entity_expansion
  unnormalize( value, entities )
end
has_next?() 点击以切换源代码

如果还有更多事件,则返回 true。与 !empty? 同义

# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 213
def has_next?
  return !(@source.empty? and @stack.empty?)
end
normalize( input, entities=nil, entity_filter=nil ) 点击以切换源代码

转义所有可能的实体

# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 551
def normalize( input, entities=nil, entity_filter=nil )
  copy = input.clone
  # Doing it like this rather than in a loop improves the speed
  copy.gsub!( EREFERENCE, '&amp;' )
  entities.each do |key, value|
    copy.gsub!( value, "&#{key};" ) unless entity_filter and
                                entity_filter.include?(entity)
  end if entities
  copy.gsub!( EREFERENCE, '&amp;' )
  DEFAULT_ENTITIES.each do |key, value|
    copy.gsub!( value[3], value[1] )
  end
  copy
end
peek(depth=0) 点击以切换源代码

查看堆栈中 depth 事件。堆栈中的第一个元素深度为 0。如果 depth 为 -1,则会解析到输入流的末尾并返回最后一个事件,该事件始终为 :end_document。请注意,这会导致解析流直到 depth 事件,因此您可以使用此方法有效地预解析整个文档(将整个内容拉入内存)。

# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 229
def peek depth=0
  raise %Q[Illegal argument "#{depth}"] if depth < -1
  temp = []
  if depth == -1
    temp.push(pull()) until empty?
  else
    while @stack.size+temp.size < depth+1
      temp.push(pull())
    end
  end
  @stack += temp if temp.size > 0
  @stack[depth]
end
position() 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 198
def position
  if @source.respond_to? :position
    @source.position
  else
    # FIXME
    0
  end
end
pull() 点击以切换源代码

返回下一个事件。这是一个 PullEvent 对象。

# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 244
def pull
  @source.drop_parsed_content

  pull_event.tap do |event|
    @listeners.each do |listener|
      listener.receive event
    end
  end
end
reset() 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 187
def reset
  @closed = nil
  @have_root = false
  @document_status = nil
  @tags = []
  @stack = []
  @entities = []
  @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
  @namespaces_restore_stack = []
end
stream=( source ) 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 182
def stream=( source )
  @source = SourceFactory.create_from( source )
  reset
end
unnormalize( string, entities=nil, filter=nil ) 点击以切换源代码

取消转义所有可能的实体

# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 567
def unnormalize( string, entities=nil, filter=nil )
  if string.include?("\r")
    rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
  else
    rv = string.dup
  end
  matches = rv.scan( REFERENCE_RE )
  return rv if matches.size == 0
  rv.gsub!( Private::CHARACTER_REFERENCES ) {
    m=$1
    if m.start_with?("x")
      code_point = Integer(m[1..-1], 16)
    else
      code_point = Integer(m, 10)
    end
    [code_point].pack('U*')
  }
  matches.collect!{|x|x[0]}.compact!
  if filter
    matches.reject! do |entity_reference|
      filter.include?(entity_reference)
    end
  end
  if matches.size > 0
    matches.tally.each do |entity_reference, n|
      entity_expansion_count_before = @entity_expansion_count
      entity_value = entity( entity_reference, entities )
      if entity_value
        if n > 1
          entity_expansion_count_delta =
            @entity_expansion_count - entity_expansion_count_before
          record_entity_expansion(entity_expansion_count_delta * (n - 1))
        end
        re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
        rv.gsub!( re, entity_value )
        if rv.bytesize > @entity_expansion_text_limit
          raise "entity expansion has grown too large"
        end
      else
        er = DEFAULT_ENTITIES[entity_reference]
        rv.gsub!( er[0], er[2] ) if er
      end
    end
    rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
  end
  rv
end
unshift(token) 点击以切换源代码

将事件推回流的头部。此方法(理论上)具有无限深度。

# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 219
def unshift token
  @stack.unshift(token)
end

私有实例方法

add_namespace(prefix, uri) 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 616
def add_namespace(prefix, uri)
  @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
  if uri.nil?
    @namespaces.delete(prefix)
  else
    @namespaces[prefix] = uri
  end
end
need_source_encoding_update?(xml_declaration_encoding) 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 649
def need_source_encoding_update?(xml_declaration_encoding)
  return false if xml_declaration_encoding.nil?
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
  true
end
parse_attributes(prefixes) 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 769
def parse_attributes(prefixes)
  attributes = {}
  expanded_names = {}
  closed = false
  while true
    if @source.match?(">", true)
      return attributes, closed
    elsif @source.match?("/>", true)
      closed = true
      return attributes, closed
    elsif match = @source.match(QNAME, true)
      name = match[1]
      prefix = match[2]
      local_part = match[3]

      unless @source.match?(/\s*=\s*/um, true)
        message = "Missing attribute equal: <#{name}>"
        raise REXML::ParseException.new(message, @source)
      end
      unless match = @source.match(/(['"])/, true)
        message = "Missing attribute value start quote: <#{name}>"
        raise REXML::ParseException.new(message, @source)
      end
      quote = match[1]
      start_position = @source.position
      value = @source.read_until(quote)
      unless value.chomp!(quote)
        @source.position = start_position
        message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
        raise REXML::ParseException.new(message, @source)
      end
      @source.match?(/\s*/um, true)
      if prefix == "xmlns"
        if local_part == "xml"
          if value != Private::XML_PREFIXED_NAMESPACE
            msg = "The 'xml' prefix must not be bound to any other namespace "+
              "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
            raise REXML::ParseException.new( msg, @source, self )
          end
        elsif local_part == "xmlns"
          msg = "The 'xmlns' prefix must not be declared "+
            "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
          raise REXML::ParseException.new( msg, @source, self)
        end
        add_namespace(local_part, value)
      elsif prefix
        prefixes << prefix unless prefix == "xml"
      end

      if attributes[name]
        msg = "Duplicate attribute #{name.inspect}"
        raise REXML::ParseException.new(msg, @source, self)
      end

      unless prefix == "xmlns"
        uri = @namespaces[prefix]
        expanded_name = [uri, local_part]
        existing_prefix = expanded_names[expanded_name]
        if existing_prefix
          message = "Namespace conflict in adding attribute " +
                    "\"#{local_part}\": " +
                    "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
                    "prefix \"#{prefix}\" = \"#{uri}\""
          raise REXML::ParseException.new(message, @source, self)
        end
        expanded_names[expanded_name] = prefix
      end

      attributes[name] = value
    else
      message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
      raise REXML::ParseException.new(message, @source)
    end
  end
end
parse_id(base_error_message, accept_external_id:, accept_public_id:) 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 668
def parse_id(base_error_message,
             accept_external_id:,
             accept_public_id:)
  if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
    pubid = system = nil
    pubid_literal = md[1]
    pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
    system_literal = md[2]
    system = system_literal[1..-2] if system_literal # Remove quote
    ["PUBLIC", pubid, system]
  elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
    pubid = system = nil
    pubid_literal = md[1]
    pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
    ["PUBLIC", pubid, nil]
  elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
    system = nil
    system_literal = md[1]
    system = system_literal[1..-2] if system_literal # Remove quote
    ["SYSTEM", nil, system]
  else
    details = parse_id_invalid_details(accept_external_id: accept_external_id,
                                       accept_public_id: accept_public_id)
    message = "#{base_error_message}: #{details}"
    raise REXML::ParseException.new(message, @source)
  end
end
parse_id_invalid_details(accept_external_id:, accept_public_id:) 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 696
def parse_id_invalid_details(accept_external_id:,
                             accept_public_id:)
  public = /\A\s*PUBLIC/um
  system = /\A\s*SYSTEM/um
  if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
    if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
      return "public ID literal is missing"
    end
    unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
      return "invalid public ID literal"
    end
    if accept_public_id
      if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
        return "system ID literal is missing"
      end
      unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
        return "invalid system literal"
      end
      "garbage after system literal"
    else
      "garbage after public ID literal"
    end
  elsif accept_external_id and @source.match?(/#{system}/um)
    if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
      return "system literal is missing"
    end
    unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
      return "invalid system literal"
    end
    "garbage after system literal"
  else
    unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
      return "invalid ID type"
    end
    "ID type is missing"
  end
end
parse_name(base_error_message) 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 655
def parse_name(base_error_message)
  md = @source.match(Private::NAME_PATTERN, true)
  unless md
    if @source.match?(/\S/um)
      message = "#{base_error_message}: invalid name"
    else
      message = "#{base_error_message}: name is missing"
    end
    raise REXML::ParseException.new(message, @source)
  end
  md[0]
end
pop_namespaces_restore() 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 631
def pop_namespaces_restore
  namespaces_restore = @namespaces_restore_stack.pop
  namespaces_restore.each do |prefix, uri|
    if uri.nil?
      @namespaces.delete(prefix)
    else
      @namespaces[prefix] = uri
    end
  end
end
process_instruction() 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 734
def process_instruction
  name = parse_name("Malformed XML: Invalid processing instruction node")
  if @source.match?(/\s+/um, true)
    match_data = @source.match(/(.*?)\?>/um, true)
    unless match_data
      raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
    end
    content = match_data[1]
  else
    content = nil
    unless @source.match?("?>", true)
      raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
    end
  end
  if name == "xml"
    if @document_status
      raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
    end
    version = VERSION.match(content)
    version = version[1] unless version.nil?
    encoding = ENCODING.match(content)
    encoding = encoding[1] unless encoding.nil?
    if need_source_encoding_update?(encoding)
      @source.encoding = encoding
    end
    if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
      encoding = "UTF-16"
    end
    standalone = STANDALONE.match(content)
    standalone = standalone[1] unless standalone.nil?
    return [ :xmldecl, version, encoding, standalone ]
  end
  [:processing_instruction, name, content]
end
pull_event() 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 254
def pull_event
  if @closed
    x, @closed = @closed, nil
    return [ :end_element, x ]
  end
  if empty?
    if @document_status == :in_doctype
      raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
    end
    unless @tags.empty?
      path = "/" + @tags.join("/")
      raise ParseException.new("Missing end tag for '#{path}'", @source)
    end
    return [ :end_document ]
  end
  return @stack.shift if @stack.size > 0
  #STDERR.puts @source.encoding
  #STDERR.puts "BUFFER = #{@source.buffer.inspect}"

  @source.ensure_buffer
  if @document_status == nil
    start_position = @source.position
    if @source.match?("<?", true)
      return process_instruction
    elsif @source.match?("<!", true)
      if @source.match?("--", true)
        md = @source.match(/(.*?)-->/um, true)
        if md.nil?
          raise REXML::ParseException.new("Unclosed comment", @source)
        end
        if /--|-\z/.match?(md[1])
          raise REXML::ParseException.new("Malformed comment", @source)
        end
        return [ :comment, md[1] ]
      elsif @source.match?("DOCTYPE", true)
        base_error_message = "Malformed DOCTYPE"
        unless @source.match?(/\s+/um, true)
          if @source.match?(">")
            message = "#{base_error_message}: name is missing"
          else
            message = "#{base_error_message}: invalid name"
          end
          @source.position = start_position
          raise REXML::ParseException.new(message, @source)
        end
        name = parse_name(base_error_message)
        if @source.match?(/\s*\[/um, true)
          id = [nil, nil, nil]
          @document_status = :in_doctype
        elsif @source.match?(/\s*>/um, true)
          id = [nil, nil, nil]
          @document_status = :after_doctype
          @source.ensure_buffer
        else
          id = parse_id(base_error_message,
                        accept_external_id: true,
                        accept_public_id: false)
          if id[0] == "SYSTEM"
            # For backward compatibility
            id[1], id[2] = id[2], nil
          end
          if @source.match?(/\s*\[/um, true)
            @document_status = :in_doctype
          elsif @source.match?(/\s*>/um, true)
            @document_status = :after_doctype
            @source.ensure_buffer
          else
            message = "#{base_error_message}: garbage after external ID"
            raise REXML::ParseException.new(message, @source)
          end
        end
        args = [:start_doctype, name, *id]
        if @document_status == :after_doctype
          @source.match?(/\s*/um, true)
          @stack << [ :end_doctype ]
        end
        return args
      else
        message = "Invalid XML"
        raise REXML::ParseException.new(message, @source)
      end
    end
  end
  if @document_status == :in_doctype
    @source.match?(/\s*/um, true) # skip spaces
    start_position = @source.position
    if @source.match?("<!", true)
      if @source.match?("ELEMENT", true)
        md = @source.match(/(.*?)>/um, true)
        raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
        return [ :elementdecl, "<!ELEMENT" + md[1] ]
      elsif @source.match?("ENTITY", true)
        match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
        unless match_data
          raise REXML::ParseException.new("Malformed entity declaration", @source)
        end
        match = [:entitydecl, *match_data.captures.compact]
        ref = false
        if match[1] == '%'
          ref = true
          match.delete_at 1
        end
        # Now we have to sort out what kind of entity reference this is
        if match[2] == 'SYSTEM'
          # External reference
          match[3] = match[3][1..-2] # PUBID
          match.delete_at(4) if match.size > 4 # Chop out NDATA decl
          # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
        elsif match[2] == 'PUBLIC'
          # External reference
          match[3] = match[3][1..-2] # PUBID
          match[4] = match[4][1..-2] # HREF
          match.delete_at(5) if match.size > 5 # Chop out NDATA decl
          # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
        elsif Private::PEREFERENCE_PATTERN.match?(match[2])
          raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
        else
          match[2] = match[2][1..-2]
          match.pop if match.size == 4
          # match is [ :entity, name, value ]
        end
        match << '%' if ref
        return match
      elsif @source.match?("ATTLIST", true)
        md = @source.match(Private::ATTLISTDECL_END, true)
        raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
        element = md[1]
        contents = md[0]

        pairs = {}
        values = md[0].strip.scan( ATTDEF_RE )
        values.each do |attdef|
          unless attdef[3] == "#IMPLIED"
            attdef.compact!
            val = attdef[3]
            val = attdef[4] if val == "#FIXED "
            pairs[attdef[0]] = val
            if attdef[0] =~ /^xmlns:(.*)/
              @namespaces[$1] = val
            end
          end
        end
        return [ :attlistdecl, element, pairs, contents ]
      elsif @source.match?("NOTATION", true)
        base_error_message = "Malformed notation declaration"
        unless @source.match?(/\s+/um, true)
          if @source.match?(">")
            message = "#{base_error_message}: name is missing"
          else
            message = "#{base_error_message}: invalid name"
          end
          @source.position = start_position
          raise REXML::ParseException.new(message, @source)
        end
        name = parse_name(base_error_message)
        id = parse_id(base_error_message,
                      accept_external_id: true,
                      accept_public_id: true)
        unless @source.match?(/\s*>/um, true)
          message = "#{base_error_message}: garbage before end >"
          raise REXML::ParseException.new(message, @source)
        end
        return [:notationdecl, name, *id]
      elsif md = @source.match(/--(.*?)-->/um, true)
        case md[1]
        when /--/, /-\z/
          raise REXML::ParseException.new("Malformed comment", @source)
        end
        return [ :comment, md[1] ] if md
      end
    elsif match = @source.match(/(%.*?;)\s*/um, true)
      return [ :externalentity, match[1] ]
    elsif @source.match?(/\]\s*>/um, true)
      @document_status = :after_doctype
      return [ :end_doctype ]
    end
    if @document_status == :in_doctype
      raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
    end
  end
  if @document_status == :after_doctype
    @source.match?(/\s*/um, true)
  end
  begin
    start_position = @source.position
    if @source.match?("<", true)
      # :text's read_until may remain only "<" in buffer. In the
      # case, buffer is empty here. So we need to fill buffer
      # here explicitly.
      @source.ensure_buffer
      if @source.match?("/", true)
        @namespaces_restore_stack.pop
        last_tag = @tags.pop
        md = @source.match(Private::CLOSE_PATTERN, true)
        if md and !last_tag
          message = "Unexpected top-level end tag (got '#{md[1]}')"
          raise REXML::ParseException.new(message, @source)
        end
        if md.nil? or last_tag != md[1]
          message = "Missing end tag for '#{last_tag}'"
          message += " (got '#{md[1]}')" if md
          @source.position = start_position if md.nil?
          raise REXML::ParseException.new(message, @source)
        end
        return [ :end_element, last_tag ]
      elsif @source.match?("!", true)
        md = @source.match(/([^>]*>)/um)
        #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
        raise REXML::ParseException.new("Malformed node", @source) unless md
        if md[0][0] == ?-
          md = @source.match(/--(.*?)-->/um, true)

          if md.nil? || /--|-\z/.match?(md[1])
            raise REXML::ParseException.new("Malformed comment", @source)
          end

          return [ :comment, md[1] ]
        else
          md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
          return [ :cdata, md[1] ] if md
        end
        raise REXML::ParseException.new( "Declarations can only occur "+
          "in the doctype declaration.", @source)
      elsif @source.match?("?", true)
        return process_instruction
      else
        # Get the next tag
        md = @source.match(Private::TAG_PATTERN, true)
        unless md
          @source.position = start_position
          raise REXML::ParseException.new("malformed XML: missing tag start", @source)
        end
        tag = md[1]
        @document_status = :in_element
        @prefixes.clear
        @prefixes << md[2] if md[2]
        push_namespaces_restore
        attributes, closed = parse_attributes(@prefixes)
        # Verify that all of the prefixes have been defined
        for prefix in @prefixes
          unless @namespaces.key?(prefix)
            raise UndefinedNamespaceException.new(prefix,@source,self)
          end
        end

        if closed
          @closed = tag
          pop_namespaces_restore
        else
          if @tags.empty? and @have_root
            raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
          end
          @tags.push( tag )
        end
        @have_root = true
        return [ :start_element, tag, attributes ]
      end
    else
      text = @source.read_until("<")
      if text.chomp!("<")
        @source.position -= "<".bytesize
      end
      if @tags.empty?
        unless /\A\s*\z/.match?(text)
          if @have_root
            raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
          else
            raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
          end
        end
        return pull_event if @have_root
      end
      return [ :text, text ]
    end
  rescue REXML::UndefinedNamespaceException
    raise
  rescue REXML::ParseException
    raise
  rescue => error
    raise REXML::ParseException.new( "Exception parsing",
      @source, self, (error ? error : $!) )
  end
  return [ :dummy ]
end
push_namespaces_restore() 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 625
def push_namespaces_restore
  namespaces_restore = {}
  @namespaces_restore_stack.push(namespaces_restore)
  namespaces_restore
end
record_entity_expansion(delta=1) 点击以切换源代码
# File rexml-3.4.0/lib/rexml/parsers/baseparser.rb, line 642
def record_entity_expansion(delta=1)
  @entity_expansion_count += delta
  if @entity_expansion_count > @entity_expansion_limit
    raise "number of entity expansions exceeded, processing aborted."
  end
end