class REXML::Text

表示 XML 文档中的文本节点

常量

EREFERENCE
NEEDS_A_SECOND_CHECK
NUMERICENTITY
REFERENCE
SETUTITSBUS
SLAICEPS: 在写入字符串中被替换的字符
SPECIALS: 替换发生的顺序
SUBSTITUTES
VALID_CHAR
VALID_XML_CHARS

属性

raw[RW]

如果 raw 为 true，则 REXML 将保持值不变

公共类方法

check(string, pattern, doctype) 点击切换源码

检查非法字符

# File rexml-3.4.0/lib/rexml/text.rb, line 116
def Text.check string, pattern, doctype

  # illegal anywhere
  if !string.match?(VALID_XML_CHARS)
    string.chars.each do |c|
      case c.ord
      when *VALID_CHAR
      else
        raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
      end
    end
  end

  pos = 0
  while (index = string.index(/<|&/, pos))
    if string[index] == "<"
      raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
    end

    unless (end_index = string.index(/[^\s];/, index + 1))
      raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
    end

    value = string[(index + 1)..end_index]
    if /\s/.match?(value)
      raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
    end

    if value[0] == "#"
      character_reference = value[1..-1]

      unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
        if character_reference[0] == "x" || character_reference[-1] == "x"
          raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
        else
          raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
        end
      end

      case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
      when *VALID_CHAR
      else
        raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
      end
    elsif !(/\A#{Entity::NAME}\z/um.match?(value))
      raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
    end

    pos = end_index + 1
  end

  string
end

new(arg, respect_whitespace=false, parent=nil, raw=nil, entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK ) 点击切换源码

构造函数 arg 如果是一个字符串，则内容被设置为该字符串。如果是一个 Text，则对象被浅克隆。

respect_whitespace (boolean, false) 如果为 true，则空格被保留

parent (nil) 如果这是一个 Parent 对象，则父类将被设置为此对象。

raw (nil) 此参数可以给出三个值。如果为 true，则用于构造此对象的值预计不包含未转义的 XML 标记，并且 REXML 不会更改文本。如果此值为 false，则字符串可能包含任何字符，并且 REXML 将转义任何和所有值包含在文本中的已定义实体。如果此值为 nil（默认值），则父类的原始值将用作此节点的原始值。如果父类没有原始值，并且没有提供值，则默认值为 false。如果您为某些文本定义了实体，并且您不希望 REXML 在输出中转义该文本，请使用此字段。

Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
Text.new( "<&", false, nil, true )  #-> Parse exception
Text.new( "&lt;&amp;", false, nil, true )  #-> "&lt;&amp;"
# Assume that the entity "s" is defined to be "sean"
# and that the entity    "r" is defined to be "russell"
Text.new( "sean russell" )          #-> "&s; &r;"
Text.new( "sean russell", false, nil, true ) #-> "sean russell"

entity_filter (nil) 这可以是一个要在提供的文本中匹配的实体数组。仅当 raw 设置为 false 时，此参数才有用。

Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"

在最后一个示例中，entity_filter 参数被忽略。

illegal 仅供内部使用

调用超类方法 REXML::Child::new

# File rexml-3.4.0/lib/rexml/text.rb, line 79
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
  entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )

  @raw = false
  @parent = nil
  @entity_filter = nil

  if parent
    super( parent )
    @raw = parent.raw
  end

  if arg.kind_of? String
    @string = arg.dup
  elsif arg.kind_of? Text
    @string = arg.instance_variable_get(:@string).dup
    @raw = arg.raw
    @entity_filter = arg.instance_variable_get(:@entity_filter)
  else
    raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
  end

  @string.squeeze!(" \n\t") unless respect_whitespace
  @string.gsub!(/\r\n?/, "\n")
  @raw = raw unless raw.nil?
  @entity_filter = entity_filter if entity_filter
  clear_cache

  Text.check(@string, illegal, doctype) if @raw
end

私有类方法

expand(ref, doctype, filter) 点击切换源码

# File rexml-3.4.0/lib/rexml/text.rb, line 404
def Text.expand(ref, doctype, filter)
  if ref[1] == ?#
    if ref[2] == ?x
      [ref[3...-1].to_i(16)].pack('U*')
    else
      [ref[2...-1].to_i].pack('U*')
    end
  elsif ref == '&amp;'
    '&'
  elsif filter and filter.include?( ref[1...-1] )
    ref
  elsif doctype
    doctype.entity( ref[1...-1] ) or ref
  else
    entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ]
    entity_value ? entity_value.value : ref
  end
end

normalize( input, doctype=nil, entity_filter=nil ) 点击切换源码

转义所有可能的实体

# File rexml-3.4.0/lib/rexml/text.rb, line 366
def Text::normalize( input, doctype=nil, entity_filter=nil )
  copy = input.to_s
  # Doing it like this rather than in a loop improves the speed
  #copy = copy.gsub( EREFERENCE, '&amp;' )
  copy = copy.gsub( "&", "&amp;" ) if copy.include?("&")
  if doctype
    # Replace all ampersands that aren't part of an entity
    doctype.entities.each_value do |entity|
      copy = copy.gsub( entity.value,
        "&#{entity.name};" ) if entity.value and
          not( entity_filter and entity_filter.include?(entity.name) )
    end
  else
    # Replace all ampersands that aren't part of an entity
    DocType::DEFAULT_ENTITIES.each_value do |entity|
      if copy.include?(entity.value)
        copy = copy.gsub(entity.value, "&#{entity.name};" )
      end
    end
  end
  copy
end

read_with_substitution( input, illegal=nil ) 点击切换源码

读取文本，替换实体

# File rexml-3.4.0/lib/rexml/text.rb, line 340
def Text::read_with_substitution( input, illegal=nil )
  copy = input.clone

  if copy =~ illegal
    raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
  end if illegal

  copy.gsub!( /\r\n?/, "\n" )
  if copy.include? ?&
    copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
    copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
    copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
    copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
    copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
    copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {
      m=$1
      #m='0' if m==''
      m = "0#{m}" if m[0] == ?x
      [Integer(m)].pack('U*')
    }
  end
  copy
end

unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil ) 点击切换源码

取消转义所有可能的实体

# File rexml-3.4.0/lib/rexml/text.rb, line 390
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
  entity_expansion_text_limit ||= Security.entity_expansion_text_limit
  sum = 0
  string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
    s = Text.expand($&, doctype, filter)
    if sum + s.bytesize > entity_expansion_text_limit
      raise "entity expansion has grown too large"
    else
      sum += s.bytesize
    end
    s
  }
end

公共实例方法

<<( to_append ) 点击切换源码

将文本附加到此文本节点。文本以该文本节点的 raw 模式附加。

返回文本本身以启用方法链，如 'text << “XXX” << “YYY”'。

# File rexml-3.4.0/lib/rexml/text.rb, line 189
def <<( to_append )
  @string << to_append.gsub( /\r\n?/, "\n" )
  clear_cache
  self
end

<=>( other ) 点击切换源码

other 一个字符串或一个 Text 返回(to_s <=> arg.to_s) 的结果

# File rexml-3.4.0/lib/rexml/text.rb, line 198
def <=>( other )
  to_s() <=> other.to_s
end

clone() 点击切换源码

# File rexml-3.4.0/lib/rexml/text.rb, line 179
def clone
  return Text.new(self, true)
end

doctype() 点击切换源码

# File rexml-3.4.0/lib/rexml/text.rb, line 202
def doctype
  if @parent
    doc = @parent.document
    doc.doctype if doc
  end
end

empty?() 点击切换源码

# File rexml-3.4.0/lib/rexml/text.rb, line 174
def empty?
  @string.size==0
end

indent_text(string, level=1, style="\t", indentfirstline=true) 点击切换源码

# File rexml-3.4.0/lib/rexml/text.rb, line 274
def indent_text(string, level=1, style="\t", indentfirstline=true)
  return string if level < 0
  new_string = ''
  string.each_line { |line|
    indent_string = style * level
    new_line = (indent_string + line).sub(/[\s]+$/,'')
    new_string << new_line
  }
  new_string.strip! unless indentfirstline
  return new_string
end

inspect() 点击切换源码

# File rexml-3.4.0/lib/rexml/text.rb, line 228
def inspect
  @string.inspect
end

node_type() 点击切换源码

# File rexml-3.4.0/lib/rexml/text.rb, line 170
def node_type
  :text
end

parent=(parent) 点击切换源码

调用超类方法 REXML::Child#parent=

# File rexml-3.4.0/lib/rexml/text.rb, line 110
def parent= parent
  super(parent)
  Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
end

to_s() 点击切换源码

返回此文本节点的字符串值。此字符串始终被转义，这意味着它是一个有效的 XML 文本节点字符串，并且所有可以转义的实体都已插入。此方法遵循构造函数中设置的实体过滤器。

# Assume that the entity "s" is defined to be "sean", and that the
# entity "r" is defined to be "russell"
t = Text.new( "< & sean russell", false, nil, false, ['s'] )
t.to_s   #-> "&lt; &amp; &s; russell"
t = Text.new( "< & &s; russell", false, nil, false )
t.to_s   #-> "&lt; &amp; &s; russell"
u = Text.new( "sean russell", false, nil, true )
u.to_s   #-> "sean russell"

# File rexml-3.4.0/lib/rexml/text.rb, line 223
def to_s
  return @string if @raw
  @normalized ||= Text::normalize( @string, doctype, @entity_filter )
end

value() 点击切换源码

返回此文本的字符串值。这是没有实体的文本，因为它可能在程序中使用，或打印到控制台。这将忽略“raw”属性设置和任何 entity_filter。

# Assume that the entity "s" is defined to be "sean", and that the
# entity "r" is defined to be "russell"
t = Text.new( "< & sean russell", false, nil, false, ['s'] )
t.value   #-> "< & sean russell"
t = Text.new( "< & &s; russell", false, nil, false )
t.value   #-> "< & sean russell"
u = Text.new( "sean russell", false, nil, true )
u.value   #-> "sean russell"

# File rexml-3.4.0/lib/rexml/text.rb, line 245
def value
  @unnormalized ||= Text::unnormalize(@string, doctype,
                                      entity_expansion_text_limit: document&.entity_expansion_text_limit)
end

value=( val ) 点击切换源码

设置此文本节点的内容。这需要文本为未规范化的。它返回 self。

e = Element.new( "a" )
e.add_text( "foo" )   # <a>foo</a>
e[0].value = "bar"    # <a>bar</a>
e[0].value = "<a>"    # <a>&lt;a&gt;</a>

# File rexml-3.4.0/lib/rexml/text.rb, line 257
def value=( val )
  @string = val.gsub( /\r\n?/, "\n" )
  clear_cache
  @raw = false
end

wrap(string, width, addnewline=false) 点击切换源码

# File rexml-3.4.0/lib/rexml/text.rb, line 263
def wrap(string, width, addnewline=false)
  # Recursively wrap string at width.
  return string if string.length <= width
  place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
  if addnewline then
    return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
  else
    return string[0,place] + "\n" + wrap(string[place+1..-1], width)
  end
end

write( writer, indent=-1, transitive=false, ie_hack=false ) 点击切换源码

已弃用¶ ↑

请参阅 REXML::Formatters

# File rexml-3.4.0/lib/rexml/text.rb, line 289
def write( writer, indent=-1, transitive=false, ie_hack=false )
  Kernel.warn("#{self.class.name}.write is deprecated.  See REXML::Formatters", uplevel: 1)
  formatter = if indent > -1
      REXML::Formatters::Pretty.new( indent )
    else
      REXML::Formatters::Default.new
    end
  formatter.write( self, writer )
end

write_with_substitution(out, input) 点击切换源码

写出文本，预先替换特殊字符。out 一个字符串，IO 或任何其他支持 <<( String ) 的对象 input 要替换和写出的文本

z=utf8.unpack("U*")
ascOut=""
z.each{|r|
  if r <  0x100
    ascOut.concat(r.chr)
  else
    ascOut.concat(sprintf("&#x%x;", r))
  end
}
puts ascOut

# File rexml-3.4.0/lib/rexml/text.rb, line 321
def write_with_substitution out, input
  copy = input.clone
  # Doing it like this rather than in a loop improves the speed
  copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
  copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
  copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
  copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
  copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
  copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
  out << copy
end

xpath() 点击切换源码

注意：这可能无法正常工作

# File rexml-3.4.0/lib/rexml/text.rb, line 301
def xpath
  path = @parent.xpath
  path += "/text()"
  return path
end

私有实例方法

clear_cache() 点击切换源码

# File rexml-3.4.0/lib/rexml/text.rb, line 334
def clear_cache
  @normalized = nil
  @unnormalized = nil
end