+++ /dev/null
-import Tokenizer, { Callbacks, QuoteType } from './Tokenizer.js'
-import { fromCodePoint } from 'entities/lib/decode.js'
-
-const formTags = new Set([
- 'input',
- 'option',
- 'optgroup',
- 'select',
- 'button',
- 'datalist',
- 'textarea'
-])
-const pTag = new Set(['p'])
-const tableSectionTags = new Set(['thead', 'tbody'])
-const ddtTags = new Set(['dd', 'dt'])
-const rtpTags = new Set(['rt', 'rp'])
-
-const openImpliesClose = new Map<string, Set<string>>([
- ['tr', new Set(['tr', 'th', 'td'])],
- ['th', new Set(['th'])],
- ['td', new Set(['thead', 'th', 'td'])],
- ['body', new Set(['head', 'link', 'script'])],
- ['li', new Set(['li'])],
- ['p', pTag],
- ['h1', pTag],
- ['h2', pTag],
- ['h3', pTag],
- ['h4', pTag],
- ['h5', pTag],
- ['h6', pTag],
- ['select', formTags],
- ['input', formTags],
- ['output', formTags],
- ['button', formTags],
- ['datalist', formTags],
- ['textarea', formTags],
- ['option', new Set(['option'])],
- ['optgroup', new Set(['optgroup', 'option'])],
- ['dd', ddtTags],
- ['dt', ddtTags],
- ['address', pTag],
- ['article', pTag],
- ['aside', pTag],
- ['blockquote', pTag],
- ['details', pTag],
- ['div', pTag],
- ['dl', pTag],
- ['fieldset', pTag],
- ['figcaption', pTag],
- ['figure', pTag],
- ['footer', pTag],
- ['form', pTag],
- ['header', pTag],
- ['hr', pTag],
- ['main', pTag],
- ['nav', pTag],
- ['ol', pTag],
- ['pre', pTag],
- ['section', pTag],
- ['table', pTag],
- ['ul', pTag],
- ['rt', rtpTags],
- ['rp', rtpTags],
- ['tbody', tableSectionTags],
- ['tfoot', tableSectionTags]
-])
-
-const voidElements = new Set([
- 'area',
- 'base',
- 'basefont',
- 'br',
- 'col',
- 'command',
- 'embed',
- 'frame',
- 'hr',
- 'img',
- 'input',
- 'isindex',
- 'keygen',
- 'link',
- 'meta',
- 'param',
- 'source',
- 'track',
- 'wbr'
-])
-
-const foreignContextElements = new Set(['math', 'svg'])
-
-const htmlIntegrationElements = new Set([
- 'mi',
- 'mo',
- 'mn',
- 'ms',
- 'mtext',
- 'annotation-xml',
- 'foreignobject',
- 'desc',
- 'title'
-])
-
-export interface ParserOptions {
- /**
- * Decode entities within the document.
- *
- * @default true
- */
- decodeEntities?: boolean
-}
-
-export interface Handler {
- onparserinit(parser: Parser): void
-
- /**
- * Resets the handler back to starting state
- */
- onreset(): void
-
- /**
- * Signals the handler that parsing is done
- */
- onend(): void
- onerror(error: Error): void
- onclosetag(name: string, isImplied: boolean): void
- onopentagname(name: string): void
- /**
- *
- * @param name Name of the attribute
- * @param value Value of the attribute.
- * @param quote Quotes used around the attribute. `null` if the attribute has no quotes around the value, `undefined` if the attribute has no value.
- */
- onattribute(
- name: string,
- value: string,
- quote?: string | undefined | null
- ): void
- onopentag(
- name: string,
- attribs: { [s: string]: string },
- isImplied: boolean
- ): void
- ontext(data: string): void
- oncomment(data: string): void
- oncdatastart(): void
- oncdataend(): void
- oncommentend(): void
- onprocessinginstruction(name: string, data: string): void
-}
-
-const reNameEnd = /\s|\//
-
-export class Parser implements Callbacks {
- /** The start index of the last event. */
- public startIndex = 0
- /** The end index of the last event. */
- public endIndex = 0
- /**
- * Store the start index of the current open tag,
- * so we can update the start index for attributes.
- */
- private openTagStart = 0
-
- private tagname = ''
- private attribname = ''
- private attribvalue = ''
- private attribs: null | { [key: string]: string } = null
- private readonly stack: string[] = []
- /** Determines whether self-closing tags are recognized. */
- private readonly foreignContext: boolean[]
- private readonly cbs: Partial<Handler>
- private readonly tokenizer: Tokenizer
-
- private buffer: string = ''
-
- constructor(
- cbs?: Partial<Handler> | null,
- private readonly options: ParserOptions = {}
- ) {
- this.cbs = cbs ?? {}
- this.tokenizer = new Tokenizer(this.options, this)
- this.foreignContext = [false]
- this.cbs.onparserinit?.(this)
- }
-
- // Tokenizer event handlers
-
- /** @internal */
- ontext(start: number, endIndex: number): void {
- const data = this.getSlice(start, endIndex)
- this.endIndex = endIndex - 1
- this.cbs.ontext?.(data)
- this.startIndex = endIndex
- }
-
- /** @internal */
- ontextentity(cp: number, endIndex: number): void {
- this.endIndex = endIndex - 1
- this.cbs.ontext?.(fromCodePoint(cp))
- this.startIndex = endIndex
- }
-
- /** @internal */
- onopentagname(start: number, endIndex: number): void {
- this.emitOpenTag(this.getSlice(start, (this.endIndex = endIndex)))
- }
-
- private emitOpenTag(name: string) {
- this.openTagStart = this.startIndex
- this.tagname = name
-
- const impliesClose = openImpliesClose.get(name)
-
- if (impliesClose) {
- while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
- const element = this.stack.shift()!
- this.cbs.onclosetag?.(element, true)
- }
- }
- if (!voidElements.has(name)) {
- this.stack.unshift(name)
-
- if (foreignContextElements.has(name)) {
- this.foreignContext.unshift(true)
- } else if (htmlIntegrationElements.has(name)) {
- this.foreignContext.unshift(false)
- }
- }
- this.cbs.onopentagname?.(name)
- if (this.cbs.onopentag) this.attribs = {}
- }
-
- private endOpenTag(isImplied: boolean) {
- this.startIndex = this.openTagStart
-
- if (this.attribs) {
- this.cbs.onopentag?.(this.tagname, this.attribs, isImplied)
- this.attribs = null
- }
- if (this.cbs.onclosetag && voidElements.has(this.tagname)) {
- this.cbs.onclosetag(this.tagname, true)
- }
-
- this.tagname = ''
- }
-
- /** @internal */
- onopentagend(endIndex: number): void {
- this.endIndex = endIndex
- this.endOpenTag(false)
-
- // Set `startIndex` for next node
- this.startIndex = endIndex + 1
- }
-
- /** @internal */
- onclosetag(start: number, endIndex: number): void {
- const name = this.getSlice(start, (this.endIndex = endIndex))
-
- if (foreignContextElements.has(name) || htmlIntegrationElements.has(name)) {
- this.foreignContext.shift()
- }
-
- if (!voidElements.has(name)) {
- const pos = this.stack.indexOf(name)
- if (pos !== -1) {
- for (let index = 0; index <= pos; index++) {
- const element = this.stack.shift()!
- // We know the stack has sufficient elements.
- this.cbs.onclosetag?.(element, index !== pos)
- }
- } else if (name === 'p') {
- // Implicit open before close
- this.emitOpenTag('p')
- this.closeCurrentTag(true)
- }
- } else if (name === 'br') {
- // We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
- this.cbs.onopentagname?.('br')
- this.cbs.onopentag?.('br', {}, true)
- this.cbs.onclosetag?.('br', false)
- }
-
- // Set `startIndex` for next node
- this.startIndex = endIndex + 1
- }
-
- /** @internal */
- onselfclosingtag(endIndex: number): void {
- this.endIndex = endIndex
- this.closeCurrentTag(false)
- // Set `startIndex` for next node
- this.startIndex = endIndex + 1
- }
-
- private closeCurrentTag(isOpenImplied: boolean) {
- const name = this.tagname
- this.endOpenTag(isOpenImplied)
-
- // Self-closing tags will be on the top of the stack
- if (this.stack[0] === name) {
- // If the opening tag isn't implied, the closing tag has to be implied.
- this.cbs.onclosetag?.(name, !isOpenImplied)
- this.stack.shift()
- }
- }
-
- /** @internal */
- onattribname(start: number, endIndex: number): void {
- this.attribname = this.getSlice((this.startIndex = start), endIndex)
- }
-
- /** @internal */
- onattribdata(start: number, endIndex: number): void {
- this.attribvalue += this.getSlice(start, endIndex)
- }
-
- /** @internal */
- onattribentity(cp: number): void {
- this.attribvalue += fromCodePoint(cp)
- }
-
- /** @internal */
- onattribend(quote: QuoteType, endIndex: number): void {
- this.endIndex = endIndex
-
- this.cbs.onattribute?.(
- this.attribname,
- this.attribvalue,
- quote === QuoteType.Double
- ? '"'
- : quote === QuoteType.Single
- ? "'"
- : quote === QuoteType.NoValue
- ? undefined
- : null
- )
-
- if (
- this.attribs &&
- !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)
- ) {
- this.attribs[this.attribname] = this.attribvalue
- }
- this.attribvalue = ''
- }
-
- private getInstructionName(value: string) {
- const index = value.search(reNameEnd)
- return index < 0 ? value : value.slice(0, index)
- }
-
- /** @internal */
- ondeclaration(start: number, endIndex: number): void {
- this.endIndex = endIndex
- const value = this.getSlice(start, endIndex)
-
- if (this.cbs.onprocessinginstruction) {
- const name = this.getInstructionName(value)
- this.cbs.onprocessinginstruction(`!${name}`, `!${value}`)
- }
-
- // Set `startIndex` for next node
- this.startIndex = endIndex + 1
- }
-
- /** @internal */
- onprocessinginstruction(start: number, endIndex: number): void {
- this.endIndex = endIndex
- const value = this.getSlice(start, endIndex)
-
- if (this.cbs.onprocessinginstruction) {
- const name = this.getInstructionName(value)
- this.cbs.onprocessinginstruction(`?${name}`, `?${value}`)
- }
-
- // Set `startIndex` for next node
- this.startIndex = endIndex + 1
- }
-
- /** @internal */
- oncomment(start: number, endIndex: number, offset: number): void {
- this.endIndex = endIndex
-
- this.cbs.oncomment?.(this.getSlice(start, endIndex - offset))
- this.cbs.oncommentend?.()
-
- // Set `startIndex` for next node
- this.startIndex = endIndex + 1
- }
-
- /** @internal */
- oncdata(start: number, endIndex: number, offset: number): void {
- this.endIndex = endIndex
- this.cbs.oncdatastart?.()
- this.cbs.ontext?.(this.getSlice(start, endIndex - offset))
- this.cbs.oncdataend?.()
- // Set `startIndex` for next node
- this.startIndex = endIndex + 1
- }
-
- /** @internal */
- onend(): void {
- if (this.cbs.onclosetag) {
- // Set the end index for all remaining tags
- this.endIndex = this.startIndex
- for (let index = 0; index < this.stack.length; index++) {
- this.cbs.onclosetag(this.stack[index], true)
- }
- }
- this.cbs.onend?.()
- }
-
- private getSlice(start: number, end: number) {
- return this.buffer.slice(start, end)
- }
-
- /**
- * Parses a chunk of data and calls the corresponding callbacks.
- *
- * @param input string to parse.
- */
- public parse(input: string): void {
- this.reset()
- this.buffer = input
- this.tokenizer.parse(input)
- }
-
- /**
- * Resets the parser to a blank state, ready to parse a new HTML document
- */
- public reset(): void {
- this.cbs.onreset?.()
- this.tokenizer.reset()
- this.tagname = ''
- this.attribname = ''
- this.attribs = null
- this.stack.length = 0
- this.startIndex = 0
- this.endIndex = 0
- this.cbs.onparserinit?.(this)
- this.foreignContext.length = 0
- this.foreignContext.unshift(false)
- }
-}
UpperZ = 0x5a, // "Z"
LowerZ = 0x7a, // "z"
LowerX = 0x78, // "x"
- OpeningSquareBracket = 0x5b // "["
+ OpeningSquareBracket = 0x5b, // "["
+ LowerV = 0x76, // "v"
+ Dot = 0x2e, // "."
+ Colon = 0x3a, // ":"
+ At = 0x40, // "@"
+ LeftSqaure = 91, // "["
+ RightSquare = 93 // "]"
}
/** All the states the tokenizer can be in. */
// Attributes
BeforeAttributeName,
InAttributeName,
+ InDirectiveName,
+ InDirectiveArg,
+ InDirectiveDynamicArg,
+ InDirectiveModifier,
AfterAttributeName,
BeforeAttributeValue,
InAttributeValueDq, // "
onattribend(quote: QuoteType, endIndex: number): void
onattribname(start: number, endIndex: number): void
+ ondirname(start: number, endIndex: number): void
+ ondirarg(start: number, endIndex: number): void
+ ondirmodifier(start: number, endIndex: number): void
+
oncomment(start: number, endIndex: number, endOffset: number): void
oncdata(start: number, endIndex: number, endOffset: number): void
} else if (c === CharCodes.Slash) {
this.state = State.InSelfClosingTag
} else if (!isWhitespace(c)) {
+ this.enterAttribute(c)
+ }
+ }
+ private enterAttribute(c: number) {
+ if (
+ c === CharCodes.LowerV &&
+ this.buffer.charCodeAt(this.index + 1) === CharCodes.Dash
+ ) {
+ this.state = State.InDirectiveName
+ this.sectionStart = this.index
+ } else if (
+ c === CharCodes.Dot ||
+ c === CharCodes.Colon ||
+ c === CharCodes.At ||
+ c === CharCodes.Number
+ ) {
+ this.cbs.ondirname(this.index, this.index + 1)
+ this.state = State.InDirectiveArg
+ this.sectionStart = this.index + 1
+ } else {
this.state = State.InAttributeName
this.sectionStart = this.index
}
this.stateAfterAttributeName(c)
}
}
+ private stateInDirectiveName(c: number): void {
+ if (c === CharCodes.Eq || isEndOfTagSection(c)) {
+ this.cbs.ondirname(this.sectionStart, this.index)
+ this.sectionStart = this.index
+ this.state = State.AfterAttributeName
+ this.stateAfterAttributeName(c)
+ } else if (c === CharCodes.Colon) {
+ this.cbs.ondirname(this.sectionStart, this.index)
+ this.state = State.InDirectiveArg
+ this.sectionStart = this.index + 1
+ } else if (c === CharCodes.Dot) {
+ this.cbs.ondirname(this.sectionStart, this.index)
+ this.state = State.InDirectiveModifier
+ this.sectionStart = this.index + 1
+ }
+ }
+ private stateInDirectiveArg(c: number): void {
+ if (c === CharCodes.Eq || isEndOfTagSection(c)) {
+ this.cbs.ondirarg(this.sectionStart, this.index)
+ this.sectionStart = this.index
+ this.state = State.AfterAttributeName
+ this.stateAfterAttributeName(c)
+ } else if (c === CharCodes.LeftSqaure) {
+ this.state = State.InDirectiveDynamicArg
+ } else if (c === CharCodes.Dot) {
+ this.cbs.ondirarg(this.sectionStart, this.index)
+ this.state = State.InDirectiveModifier
+ this.sectionStart = this.index + 1
+ }
+ }
+ private stateInDynamicDirectiveArg(c: number): void {
+ if (c === CharCodes.RightSquare) {
+ this.state = State.InDirectiveArg
+ } else if (c === CharCodes.Eq || isEndOfTagSection(c)) {
+ // TODO emit error
+ }
+ }
+ private stateInDirectiveModifier(c: number): void {
+ if (c === CharCodes.Eq || isEndOfTagSection(c)) {
+ this.cbs.ondirmodifier(this.sectionStart, this.index)
+ this.sectionStart = this.index
+ this.state = State.AfterAttributeName
+ this.stateAfterAttributeName(c)
+ } else if (c === CharCodes.Dot) {
+ this.cbs.ondirmodifier(this.sectionStart, this.index)
+ this.sectionStart = this.index + 1
+ }
+ }
private stateAfterAttributeName(c: number): void {
if (c === CharCodes.Eq) {
this.state = State.BeforeAttributeValue
this.stateBeforeAttributeName(c)
} else if (!isWhitespace(c)) {
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
- this.state = State.InAttributeName
- this.sectionStart = this.index
+ this.enterAttribute(c)
}
}
private stateBeforeAttributeValue(c: number): void {
this.stateInAttributeName(c)
break
}
+ case State.InDirectiveName: {
+ this.stateInDirectiveName(c)
+ break
+ }
+ case State.InDirectiveArg: {
+ this.stateInDirectiveArg(c)
+ break
+ }
+ case State.InDirectiveDynamicArg: {
+ this.stateInDynamicDirectiveArg(c)
+ break
+ }
+ case State.InDirectiveModifier: {
+ this.stateInDirectiveModifier(c)
+ break
+ }
case State.InCommentLike: {
this.stateInCommentLike(c)
break
this.state === State.BeforeAttributeValue ||
this.state === State.AfterAttributeName ||
this.state === State.InAttributeName ||
+ this.state === State.InDirectiveName ||
+ this.state === State.InDirectiveArg ||
+ this.state === State.InDirectiveDynamicArg ||
+ this.state === State.InDirectiveModifier ||
this.state === State.InAttributeValueSq ||
this.state === State.InAttributeValueDq ||
this.state === State.InAttributeValueNq ||
onattribname(start, end) {
const name = getSlice(start, end)
- if (currentAttrs.has(name)) {
- // TODO emit error DUPLICATE_ATTRIBUTE
- } else {
- currentAttrs.add(name)
- }
if (!inVPre && isDirective(name)) {
// directive
const match = directiveParseRE.exec(name)!
currentAttrValue += fromCodePoint(codepoint)
},
onattribend(_quote, end) {
- if (currentElement) {
- if (currentAttrValue) {
- if (currentProp!.type === NodeTypes.ATTRIBUTE) {
- // assign value
- currentProp!.value = {
- type: NodeTypes.TEXT,
- content: currentAttrValue,
- // @ts-expect-error TODO
- loc: {}
- }
- } else {
- // directive
- currentProp!.exp = {
- type: NodeTypes.SIMPLE_EXPRESSION,
- content: currentAttrValue,
- isStatic: false,
- // Treat as non-constant by default. This can be potentially set to
- // other values by `transformExpression` to make it eligible for hoisting.
- constType: ConstantTypes.NOT_CONSTANT,
- // @ts-expect-error TODO
- loc: {}
- }
- }
- }
- currentProp!.loc.end = tokenizer.getPositionForIndex(end)
- currentElement.props.push(currentProp!)
- }
+ // TODO check duplicate
+ // if (currentAttrs.has(name)) {
+ // // emit error DUPLICATE_ATTRIBUTE
+ // } else {
+ // currentAttrs.add(name)
+ // }
+ // if (currentElement) {
+ // if (currentAttrValue) {
+ // if (currentProp!.type === NodeTypes.ATTRIBUTE) {
+ // // assign value
+ // currentProp!.value = {
+ // type: NodeTypes.TEXT,
+ // content: currentAttrValue,
+ // // @ts-expect-error TODO
+ // loc: {}
+ // }
+ // } else {
+ // // directive
+ // currentProp!.exp = {
+ // type: NodeTypes.SIMPLE_EXPRESSION,
+ // content: currentAttrValue,
+ // isStatic: false,
+ // // Treat as non-constant by default. This can be potentially set to
+ // // other values by `transformExpression` to make it eligible for hoisting.
+ // constType: ConstantTypes.NOT_CONSTANT,
+ // // @ts-expect-error TODO
+ // loc: {}
+ // }
+ // }
+ // }
+ // currentProp!.loc.end = tokenizer.getPositionForIndex(end)
+ // currentElement.props.push(currentProp!)
+ // }
currentAttrValue = ''
},
+ ondirname(start, end) {
+ // console.log('name ' + getSlice(start, end))
+ currentProp
+ },
+ ondirarg(start, end) {
+ // console.log('arg ' + getSlice(start, end))
+ },
+ ondirmodifier(start, end) {
+ // console.log('.' + getSlice(start, end))
+ },
+
oncomment(start, end, offset) {
// TODO oncomment
},
onend() {
- const end = currentInput.length
+ const end = currentInput.length - 1
for (let index = 0; index < stack.length; index++) {
onCloseTag(stack[index], end)
}