DecodingMode,
htmlDecodeTree
} from 'entities/lib/decode.js'
+import { Position } from '../ast'
const enum CharCodes {
Tab = 0x9, // "\t"
}
export interface Callbacks {
+ ontext(start: number, endIndex: number): void
+ ontextentity(codepoint: number, endIndex: number): void
+
+ onopentagname(start: number, endIndex: number): void
+ onopentagend(endIndex: number): void
+ onselfclosingtag(endIndex: number): void
+ onclosetag(start: number, endIndex: number): void
+
onattribdata(start: number, endIndex: number): void
onattribentity(codepoint: number): void
onattribend(quote: QuoteType, endIndex: number): void
onattribname(start: number, endIndex: number): void
- oncdata(start: number, endIndex: number, endOffset: number): void
- onclosetag(start: number, endIndex: number): void
+
oncomment(start: number, endIndex: number, endOffset: number): void
- ondeclaration(start: number, endIndex: number): void
+ oncdata(start: number, endIndex: number, endOffset: number): void
+
+ // onprocessinginstruction(start: number, endIndex: number): void
+ // ondeclaration(start: number, endIndex: number): void
onend(): void
- onopentagend(endIndex: number): void
- onopentagname(start: number, endIndex: number): void
- onprocessinginstruction(start: number, endIndex: number): void
- onselfclosingtag(endIndex: number): void
- ontext(start: number, endIndex: number): void
- ontextentity(codepoint: number, endIndex: number): void
}
/**
private baseState = State.Text
/** For special parsing behavior inside of script and style tags. */
private isSpecial = false
+ /** Reocrd newline positions for fast line / column calculation */
+ private newlines: number[] = []
private readonly decodeEntities: boolean
private readonly entityDecoder: EntityDecoder
- public line = 1
- public column = 1
- public startLine = 1
- public startColumn = 1
-
constructor(
{ decodeEntities = true }: { decodeEntities?: boolean },
private readonly cbs: Callbacks
public reset(): void {
this.state = State.Text
this.buffer = ''
- this.recordStart(0)
+ this.sectionStart = 0
this.index = 0
- this.line = 1
- this.column = 1
- this.startLine = 1
- this.startColumn = 1
this.baseState = State.Text
this.currentSequence = undefined!
+ this.newlines.length = 0
}
- private recordStart(start = this.index) {
- this.sectionStart = start
- this.startLine = this.line
- this.startColumn = this.column + (start - this.index)
+ /**
+ * Generate Position object with line / column information using recorded
+ * newline positions. We know the index is always going to be an already
+ * processed index, so all the newlines up to this index should have been
+ * recorded.
+ */
+ public getPositionForIndex(index: number): Position {
+ let line = 1
+ let column = index + 1
+ for (let i = this.newlines.length - 1; i >= 0; i--) {
+ const newlineIndex = this.newlines[i]
+ if (index > newlineIndex) {
+ line = i + 2
+ column = index - newlineIndex
+ break
+ }
+ }
+ return {
+ offset: index,
+ line,
+ column
+ }
}
private stateText(c: number): void {
this.cbs.ontext(this.sectionStart, this.index)
}
this.state = State.BeforeTagName
- this.recordStart()
+ this.sectionStart = this.index
} else if (this.decodeEntities && c === CharCodes.Amp) {
this.startEntity()
}
}
this.isSpecial = false
- this.recordStart(endOfText + 2) // Skip over the `</`
+ this.sectionStart = endOfText + 2 // Skip over the `</`
this.stateInClosingTagName(c)
return // We are done; skip the rest of the function.
}
this.state = State.InCommentLike
this.currentSequence = Sequences.CdataEnd
this.sequenceIndex = 0
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
}
} else {
this.sequenceIndex = 0
}
this.sequenceIndex = 0
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
this.state = State.Text
}
} else if (this.sequenceIndex === 0) {
private stateBeforeTagName(c: number): void {
if (c === CharCodes.ExclamationMark) {
this.state = State.BeforeDeclaration
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
} else if (c === CharCodes.Questionmark) {
this.state = State.InProcessingInstruction
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
} else if (this.isTagStartChar(c)) {
const lower = c | 0x20
- this.recordStart()
+ this.sectionStart = this.index
if (lower === Sequences.TitleEnd[2]) {
this.startSpecial(Sequences.TitleEnd, 3)
} else {
private stateInTagName(c: number): void {
if (isEndOfTagSection(c)) {
this.cbs.onopentagname(this.sectionStart, this.index)
- this.recordStart(-1)
+ this.sectionStart = -1
this.state = State.BeforeAttributeName
this.stateBeforeAttributeName(c)
}
this.state = this.isTagStartChar(c)
? State.InClosingTagName
: State.InSpecialComment
- this.recordStart()
+ this.sectionStart = this.index
}
}
private stateInClosingTagName(c: number): void {
if (c === CharCodes.Gt || isWhitespace(c)) {
this.cbs.onclosetag(this.sectionStart, this.index)
- this.recordStart(-1)
+ this.sectionStart = -1
this.state = State.AfterClosingTagName
this.stateAfterClosingTagName(c)
}
// Skip everything until ">"
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.state = State.Text
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
}
}
private stateBeforeAttributeName(c: number): void {
} else {
this.state = State.Text
}
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
} else if (c === CharCodes.Slash) {
this.state = State.InSelfClosingTag
} else if (!isWhitespace(c)) {
this.state = State.InAttributeName
- this.recordStart()
+ this.sectionStart = this.index
}
}
private stateInSelfClosingTag(c: number): void {
if (c === CharCodes.Gt) {
this.cbs.onselfclosingtag(this.index)
this.state = State.Text
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
this.isSpecial = false // Reset special state, in case of self-closing special tags
} else if (!isWhitespace(c)) {
this.state = State.BeforeAttributeName
private stateInAttributeName(c: number): void {
if (c === CharCodes.Eq || isEndOfTagSection(c)) {
this.cbs.onattribname(this.sectionStart, this.index)
- this.recordStart()
+ this.sectionStart = this.index
this.state = State.AfterAttributeName
this.stateAfterAttributeName(c)
}
this.state = State.BeforeAttributeValue
} else if (c === CharCodes.Slash || c === CharCodes.Gt) {
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
- this.recordStart(-1)
+ this.sectionStart = -1
this.state = State.BeforeAttributeName
this.stateBeforeAttributeName(c)
} else if (!isWhitespace(c)) {
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
this.state = State.InAttributeName
- this.recordStart()
+ this.sectionStart = this.index
}
}
private stateBeforeAttributeValue(c: number): void {
if (c === CharCodes.DoubleQuote) {
this.state = State.InAttributeValueDq
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
} else if (c === CharCodes.SingleQuote) {
this.state = State.InAttributeValueSq
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
} else if (!isWhitespace(c)) {
- this.recordStart()
+ this.sectionStart = this.index
this.state = State.InAttributeValueNq
this.stateInAttributeValueNoQuotes(c) // Reconsume token
}
private handleInAttributeValue(c: number, quote: number) {
if (c === quote || (!this.decodeEntities && this.fastForwardTo(quote))) {
this.cbs.onattribdata(this.sectionStart, this.index)
- this.recordStart(-1)
+ this.sectionStart = -1
this.cbs.onattribend(
quote === CharCodes.DoubleQuote ? QuoteType.Double : QuoteType.Single,
this.index + 1
private stateInAttributeValueNoQuotes(c: number): void {
if (isWhitespace(c) || c === CharCodes.Gt) {
this.cbs.onattribdata(this.sectionStart, this.index)
- this.recordStart(-1)
+ this.sectionStart = -1
this.cbs.onattribend(QuoteType.Unquoted, this.index)
this.state = State.BeforeAttributeName
this.stateBeforeAttributeName(c)
}
private stateInDeclaration(c: number): void {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
- this.cbs.ondeclaration(this.sectionStart, this.index)
+ // this.cbs.ondeclaration(this.sectionStart, this.index)
this.state = State.Text
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
}
}
private stateInProcessingInstruction(c: number): void {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
- this.cbs.onprocessinginstruction(this.sectionStart, this.index)
+ // this.cbs.onprocessinginstruction(this.sectionStart, this.index)
this.state = State.Text
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
}
}
private stateBeforeComment(c: number): void {
this.currentSequence = Sequences.CommentEnd
// Allow short comments (eg. <!-->)
this.sequenceIndex = 2
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
} else {
this.state = State.InDeclaration
}
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.oncomment(this.sectionStart, this.index, 0)
this.state = State.Text
- this.recordStart(this.index + 1)
+ this.sectionStart = this.index + 1
}
}
private stateBeforeSpecialS(c: number): void {
break
}
}
- this.index++
- // line / column handling
if (c === CharCodes.NewLine) {
- this.line++
- this.column = 1
- } else {
- this.column++
+ this.newlines.push(this.index)
}
+ this.index++
}
this.cleanup()
this.finish()
(this.state === State.InSpecialTag && this.sequenceIndex === 0)
) {
this.cbs.ontext(this.sectionStart, this.index)
- this.recordStart()
+ this.sectionStart = this.index
} else if (
this.state === State.InAttributeValueDq ||
this.state === State.InAttributeValueSq ||
this.state === State.InAttributeValueNq
) {
this.cbs.onattribdata(this.sectionStart, this.index)
- this.recordStart()
+ this.sectionStart = this.index
}
}
}
if (this.sectionStart < this.entityStart) {
this.cbs.onattribdata(this.sectionStart, this.entityStart)
}
- this.recordStart(this.entityStart + consumed)
+ this.sectionStart = this.entityStart + consumed
this.index = this.sectionStart - 1
this.cbs.onattribentity(cp)
if (this.sectionStart < this.entityStart) {
this.cbs.ontext(this.sectionStart, this.entityStart)
}
- this.recordStart(this.entityStart + consumed)
+ this.sectionStart = this.entityStart + consumed
this.index = this.sectionStart - 1
this.cbs.ontextentity(cp, this.sectionStart)