From b1fd528db5305d85c6dfabd8ff7d0656c7f97a39 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Wed, 19 Oct 2011 08:03:30 +1100 Subject: [PATCH] html: parse raw text and RCDATA elements, such as <p>

#document | | | " that closes the next token. If + // non-empty, the subsequent call to Next will return a raw or RCDATA text + // token: one that treats "

" as text instead of an element. + // rawTag's contents are lower-cased. + rawTag string + // textIsRaw is whether the current text token's data is not escaped. + textIsRaw bool } // Error returns the error associated with the most recent ErrorToken token. @@ -225,6 +233,54 @@ func (z *Tokenizer) skipWhiteSpace() { } } +// readRawOrRCDATA reads until the next "", where "foo" is z.rawTag and +// is typically something like "script" or "textarea". +func (z *Tokenizer) readRawOrRCDATA() { +loop: + for { + c := z.readByte() + if z.err != nil { + break loop + } + if c != '<' { + continue loop + } + c = z.readByte() + if z.err != nil { + break loop + } + if c != '/' { + continue loop + } + for i := 0; i < len(z.rawTag); i++ { + c = z.readByte() + if z.err != nil { + break loop + } + if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') { + continue loop + } + } + c = z.readByte() + if z.err != nil { + break loop + } + switch c { + case ' ', '\n', '\r', '\t', '\f', '/', '>': + // The 3 is 2 for the leading "". + z.raw.end-- + } + } + z.data.end = z.raw.end + // A textarea's or title's RCDATA can contain escaped entities. + z.textIsRaw = z.rawTag != "textarea" && z.rawTag != "title" + z.rawTag = "" +} + // readComment reads the next comment token starting with "