*/
decodeEntities?: boolean
- /**
- * If set to true, all tags will be lowercased.
- *
- * @default true
- */
- lowerCaseTags?: boolean
-
- /**
- * If set to `true`, all attribute names will be lowercased. This has noticeable impact on speed.
- *
- * @default true
- */
- lowerCaseAttributeNames?: boolean
-
/**
* If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled.
* NOTE: If xmlMode is set to `true` then CDATA sections will always be recognized as text.
* @default false
*/
recognizeSelfClosing?: boolean
-
- /**
- * Allows the default tokenizer to be overwritten.
- */
- Tokenizer?: typeof Tokenizer
}
export interface Handler {
/** Determines whether self-closing tags are recognized. */
private readonly foreignContext: boolean[]
private readonly cbs: Partial<Handler>
- private readonly lowerCaseTagNames: boolean
- private readonly lowerCaseAttributeNames: boolean
private readonly recognizeSelfClosing: boolean
private readonly tokenizer: Tokenizer
- private readonly buffers: string[] = []
- private bufferOffset = 0
- /** The index of the last written buffer. Used when resuming after a `pause()`. */
- private writeIndex = 0
- /** Indicates whether the parser has finished running / `.end` has been called. */
- private ended = false
+ private buffer: string = ''
constructor(
cbs?: Partial<Handler> | null,
private readonly options: ParserOptions = {}
) {
this.cbs = cbs ?? {}
- this.lowerCaseTagNames = options.lowerCaseTags ?? true
- this.lowerCaseAttributeNames = options.lowerCaseAttributeNames ?? true
this.recognizeSelfClosing = options.recognizeSelfClosing ?? false
- this.tokenizer = new (options.Tokenizer ?? Tokenizer)(this.options, this)
+ this.tokenizer = new Tokenizer(this.options, this)
this.foreignContext = [false]
this.cbs.onparserinit?.(this)
}
this.startIndex = endIndex
}
- /**
- * Checks if the current tag is a void element. Override this if you want
- * to specify your own additional void elements.
- */
- protected isVoidElement(name: string): boolean {
- return voidElements.has(name)
- }
-
/** @internal */
onopentagname(start: number, endIndex: number): void {
- this.endIndex = endIndex
-
- let name = this.getSlice(start, endIndex)
-
- if (this.lowerCaseTagNames) {
- name = name.toLowerCase()
- }
-
- this.emitOpenTag(name)
+ this.emitOpenTag(this.getSlice(start, (this.endIndex = endIndex)))
}
private emitOpenTag(name: string) {
this.cbs.onclosetag?.(element, true)
}
}
- if (!this.isVoidElement(name)) {
+ if (!voidElements.has(name)) {
this.stack.unshift(name)
if (foreignContextElements.has(name)) {
this.cbs.onopentag?.(this.tagname, this.attribs, isImplied)
this.attribs = null
}
- if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
+ if (this.cbs.onclosetag && voidElements.has(this.tagname)) {
this.cbs.onclosetag(this.tagname, true)
}
/** @internal */
onclosetag(start: number, endIndex: number): void {
- this.endIndex = endIndex
-
- let name = this.getSlice(start, endIndex)
-
- if (this.lowerCaseTagNames) {
- name = name.toLowerCase()
- }
+ const name = this.getSlice(start, (this.endIndex = endIndex))
if (foreignContextElements.has(name) || htmlIntegrationElements.has(name)) {
this.foreignContext.shift()
}
- if (!this.isVoidElement(name)) {
+ if (!voidElements.has(name)) {
const pos = this.stack.indexOf(name)
if (pos !== -1) {
for (let index = 0; index <= pos; index++) {
/** @internal */
onattribname(start: number, endIndex: number): void {
- this.startIndex = start
- const name = this.getSlice(start, endIndex)
-
- this.attribname = this.lowerCaseAttributeNames ? name.toLowerCase() : name
+ this.attribname = this.getSlice((this.startIndex = start), endIndex)
}
/** @internal */
private getInstructionName(value: string) {
const index = value.search(reNameEnd)
- let name = index < 0 ? value : value.substr(0, index)
-
- if (this.lowerCaseTagNames) {
- name = name.toLowerCase()
- }
-
- return name
+ return index < 0 ? value : value.slice(0, index)
}
/** @internal */
this.cbs.onend?.()
}
+ private getSlice(start: number, end: number) {
+ return this.buffer.slice(start, end)
+ }
+
+ /**
+ * Parses a chunk of data and calls the corresponding callbacks.
+ *
+ * @param input string to parse.
+ */
+ public parse(input: string): void {
+ this.reset()
+ this.buffer = input
+ this.tokenizer.write(input)
+ this.tokenizer.end()
+ }
+
/**
* Resets the parser to a blank state, ready to parse a new HTML document
*/
this.startIndex = 0
this.endIndex = 0
this.cbs.onparserinit?.(this)
- this.buffers.length = 0
this.foreignContext.length = 0
this.foreignContext.unshift(false)
- this.bufferOffset = 0
- this.writeIndex = 0
- this.ended = false
- }
-
- /**
- * Resets the parser, then parses a complete document and
- * pushes it to the handler.
- *
- * @param data Document to parse.
- */
- public parseComplete(data: string): void {
- this.reset()
- this.end(data)
- }
-
- private getSlice(start: number, end: number) {
- while (start - this.bufferOffset >= this.buffers[0].length) {
- this.shiftBuffer()
- }
-
- let slice = this.buffers[0].slice(
- start - this.bufferOffset,
- end - this.bufferOffset
- )
-
- while (end - this.bufferOffset > this.buffers[0].length) {
- this.shiftBuffer()
- slice += this.buffers[0].slice(0, end - this.bufferOffset)
- }
-
- return slice
- }
-
- private shiftBuffer(): void {
- this.bufferOffset += this.buffers[0].length
- this.writeIndex--
- this.buffers.shift()
- }
-
- /**
- * Parses a chunk of data and calls the corresponding callbacks.
- *
- * @param chunk Chunk to parse.
- */
- public write(chunk: string): void {
- if (this.ended) {
- this.cbs.onerror?.(new Error('.write() after done!'))
- return
- }
-
- this.buffers.push(chunk)
- if (this.tokenizer.running) {
- this.tokenizer.write(chunk)
- this.writeIndex++
- }
- }
-
- /**
- * Parses the end of the buffer and clears the stack, calls onend.
- *
- * @param chunk Optional final chunk to parse.
- */
- public end(chunk?: string): void {
- if (this.ended) {
- this.cbs.onerror?.(new Error('.end() after done!'))
- return
- }
-
- if (chunk) this.write(chunk)
- this.ended = true
- this.tokenizer.end()
- }
-
- /**
- * Pauses parsing. The parser won't emit events until `resume` is called.
- */
- public pause(): void {
- this.tokenizer.pause()
- }
-
- /**
- * Resumes parsing after `pause` was called.
- */
- public resume(): void {
- this.tokenizer.resume()
-
- while (this.tokenizer.running && this.writeIndex < this.buffers.length) {
- this.tokenizer.write(this.buffers[this.writeIndex++])
- }
-
- if (this.ended) this.tokenizer.end()
- }
-
- /**
- * Alias of `write`, for backwards compatibility.
- *
- * @param chunk Chunk to parse.
- * @deprecated
- */
- public parseChunk(chunk: string): void {
- this.write(chunk)
- }
- /**
- * Alias of `end`, for backwards compatibility.
- *
- * @param chunk Optional final chunk to parse.
- * @deprecated
- */
- public done(chunk?: string): void {
- this.end(chunk)
}
}