class CSV::Parser::InputsScanner

CSV::InputsScanner 接收 IO 输入、编码和 chunk_size。它还使用 keep_startkeep_endkeep_backkeep_drop 方法控制对象的生命周期。

CSV::InputsScanner.scan() 尝试在当前位置与模式匹配。如果匹配成功,扫描器会前进“扫描指针”并返回匹配的字符串。否则,扫描器返回 nil。

CSV::InputsScanner.rest() 返回字符串的“剩余”部分(即扫描指针之后的所有内容)。如果没有更多数据(eos? = true),则返回“”。

公共类方法

new(inputs, encoding, row_separator, chunk_size: 8192) 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 87
def initialize(inputs, encoding, row_separator, chunk_size: 8192)
  @inputs = inputs.dup
  @encoding = encoding
  @row_separator = row_separator
  @chunk_size = chunk_size
  @last_scanner = @inputs.empty?
  @keeps = []
  read_chunk
end

公共实例方法

check(pattern) 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 257
def check(pattern)
  @scanner.check(pattern)
end
each_line(row_separator) { |buffer| ... } 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 97
def each_line(row_separator)
  return enum_for(__method__, row_separator) unless block_given?
  buffer = nil
  input = @scanner.rest
  position = @scanner.pos
  offset = 0
  n_row_separator_chars = row_separator.size
  # trace(__method__, :start, input)
  while true
    input.each_line(row_separator) do |line|
      @scanner.pos += line.bytesize
      if buffer
        if n_row_separator_chars == 2 and
          buffer.end_with?(row_separator[0]) and
          line.start_with?(row_separator[1])
          buffer << line[0]
          line = line[1..-1]
          position += buffer.bytesize + offset
          @scanner.pos = position
          offset = 0
          yield(buffer)
          buffer = nil
          next if line.empty?
        else
          buffer << line
          line = buffer
          buffer = nil
        end
      end
      if line.end_with?(row_separator)
        position += line.bytesize + offset
        @scanner.pos = position
        offset = 0
        yield(line)
      else
        buffer = line
      end
    end
    break unless read_chunk
    input = @scanner.rest
    position = @scanner.pos
    offset = -buffer.bytesize if buffer
  end
  yield(buffer) if buffer
end
eos?() 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 170
def eos?
  @scanner.eos?
end
keep_back() 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 197
def keep_back
  # trace(__method__, :start)
  scanner, start, buffer = @keeps.pop
  if buffer
    # trace(__method__, :rescan, start, buffer)
    string = @scanner.string
    if scanner == @scanner
      keep = string.byteslice(start,
                              string.bytesize - @scanner.pos - start)
    else
      keep = string
    end
    if keep and not keep.empty?
      @inputs.unshift(StringIO.new(keep))
      @last_scanner = false
    end
    @scanner = StringScanner.new(buffer)
  else
    if @scanner != scanner
      message = "scanners are different but no buffer: "
      message += "#{@scanner.inspect}(#{@scanner.object_id}): "
      message += "#{scanner.inspect}(#{scanner.object_id})"
      raise UnexpectedError, message
    end
    # trace(__method__, :repos, start, buffer)
    @scanner.pos = start
    last_scanner, last_start, last_buffer = @keeps.last
    # Drop the last buffer when the last buffer is the same data
    # in the last keep. If we keep it, we have duplicated data
    # by the next keep_back.
    if last_scanner == @scanner and
      last_buffer and
      last_buffer == last_scanner.string.byteslice(last_start, start)
      @keeps.last[2] = nil
    end
  end
  read_chunk if @scanner.eos?
end
keep_drop() 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 236
def keep_drop
  _, _, buffer = @keeps.pop
  # trace(__method__, :done, :empty) unless buffer
  return unless buffer

  last_keep = @keeps.last
  # trace(__method__, :done, :no_last_keep) unless last_keep
  return unless last_keep

  if last_keep[2]
    last_keep[2] << buffer
  else
    last_keep[2] = buffer
  end
  # trace(__method__, :done)
end
keep_end() 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 181
def keep_end
  # trace(__method__, :start)
  scanner, start, buffer = @keeps.pop
  if scanner == @scanner
    keep = @scanner.string.byteslice(start, @scanner.pos - start)
  else
    keep = @scanner.string.byteslice(0, @scanner.pos)
  end
  if buffer
    buffer << keep
    keep = buffer
  end
  # trace(__method__, :done, keep)
  keep
end
keep_start() 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 174
def keep_start
  # trace(__method__, :start)
  adjust_last_keep
  @keeps.push([@scanner, @scanner.pos, nil])
  # trace(__method__, :done)
end
rest() 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 253
def rest
  @scanner.rest
end
scan(pattern) 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 143
def scan(pattern)
  # trace(__method__, pattern, :start)
  value = @scanner.scan(pattern)
  # trace(__method__, pattern, :done, :last, value) if @last_scanner
  return value if @last_scanner

  read_chunk if value and @scanner.eos?
  # trace(__method__, pattern, :done, value)
  value
end
scan_all(pattern) 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 154
def scan_all(pattern)
  # trace(__method__, pattern, :start)
  value = @scanner.scan(pattern)
  # trace(__method__, pattern, :done, :last, value) if @last_scanner
  return value if @last_scanner

  # trace(__method__, pattern, :done, :nil) if value.nil?
  return nil if value.nil?
  while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
    # trace(__method__, pattern, :sub, sub_value)
    value << sub_value
  end
  # trace(__method__, pattern, :done, value)
  value
end

私有实例方法

adjust_last_keep() 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 266
def adjust_last_keep
  # trace(__method__, :start)

  keep = @keeps.last
  # trace(__method__, :done, :empty) if keep.nil?
  return if keep.nil?

  scanner, start, buffer = keep
  string = @scanner.string
  if @scanner != scanner
    start = 0
  end
  if start == 0 and @scanner.eos?
    keep_data = string
  else
    keep_data = string.byteslice(start, @scanner.pos - start)
  end
  if keep_data
    if buffer
      buffer << keep_data
    else
      keep[2] = keep_data.dup
    end
  end

  # trace(__method__, :done)
end
read_chunk() 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 294
def read_chunk
  return false if @last_scanner

  adjust_last_keep

  input = @inputs.first
  case input
  when StringIO
    string = input.read
    raise InvalidEncoding unless string.valid_encoding?
    # trace(__method__, :stringio, string)
    @scanner = StringScanner.new(string)
    @inputs.shift
    @last_scanner = @inputs.empty?
    true
  else
    chunk = input.gets(@row_separator, @chunk_size)
    if chunk
      raise InvalidEncoding unless chunk.valid_encoding?
      # trace(__method__, :chunk, chunk)
      @scanner = StringScanner.new(chunk)
      if input.respond_to?(:eof?) and input.eof?
        @inputs.shift
        @last_scanner = @inputs.empty?
      end
      true
    else
      # trace(__method__, :no_chunk)
      @scanner = StringScanner.new("".encode(@encoding))
      @inputs.shift
      @last_scanner = @inputs.empty?
      if @last_scanner
        false
      else
        read_chunk
      end
    end
  end
end
trace(*args) 点击以切换源代码
# File csv-3.3.2/lib/csv/parser.rb, line 262
def trace(*args)
  pp([*args, @scanner, @scanner&.string, @scanner&.pos, @keeps])
end