# File lib/html5/tokenizer.rb, line 249
    def data_state
      data = @stream.char

      if @content_model_flag == :CDATA or @content_model_flag == :RCDATA
        @lastFourChars << data
        @lastFourChars.shift if @lastFourChars.length > 4
      end

      if data == "&" and [:PCDATA,:RCDATA].include?(@content_model_flag) and !@escapeFlag
          @state = :entity_data_state
      elsif data == "-" && [:CDATA, :RCDATA].include?(@content_model_flag) && !@escapeFlag && @lastFourChars.join('') == "<!--"
          @escapeFlag = true
          @token_queue << {:type => :Characters, :data => data}
      elsif data == "<" and !@escapeFlag and
        [:PCDATA,:CDATA,:RCDATA].include?(@content_model_flag)
          @state = :tag_open_state
      elsif data == ">" and @escapeFlag and 
        [:CDATA,:RCDATA].include?(@content_model_flag) and
        @lastFourChars[1..-1].join('') == "-->"
          @escapeFlag = false
          @token_queue << {:type => :Characters, :data => data}

      elsif data == :EOF
        # Tokenization ends.
        return false

      elsif SPACE_CHARACTERS.include? data
        # Directly after emitting a token you switch back to the "data
        # state". At that point SPACE_CHARACTERS are important so they are
        # emitted separately.
        # XXX need to check if we don't need a special "spaces" flag on
        # characters.
        @token_queue << {:type => :SpaceCharacters, :data => data + @stream.chars_until(SPACE_CHARACTERS, true)}
      else
        @token_queue << {:type => :Characters, :data => data + @stream.chars_until(%w[& < > -])}
      end
      return true
    end