])
export interface ParserOptions {
- /**
- * Indicates whether special tags (`<script>`, `<style>`, and `<title>`) should get special treatment
- * and if "empty" tags (eg. `<br>`) can have children. If `false`, the content of special tags
- * will be text only. For feeds and other XML content (documents that don't consist of HTML),
- * set this to `true`.
- *
- * @default false
- */
- xmlMode?: boolean
-
/**
* Decode entities within the document.
*
/**
* If set to true, all tags will be lowercased.
*
- * @default !xmlMode
+ * @default true
*/
lowerCaseTags?: boolean
/**
* If set to `true`, all attribute names will be lowercased. This has noticeable impact on speed.
*
- * @default !xmlMode
+ * @default true
*/
lowerCaseAttributeNames?: boolean
* If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled.
* NOTE: If xmlMode is set to `true` then CDATA sections will always be recognized as text.
*
- * @default xmlMode
+ * @default false
*/
recognizeCDATA?: boolean
* If set to `true`, self-closing tags will trigger the onclosetag event even if xmlMode is not set to `true`.
* NOTE: If xmlMode is set to `true` then self-closing tags will always be recognized.
*
- * @default xmlMode
+ * @default false
*/
recognizeSelfClosing?: boolean
private readonly lowerCaseTagNames: boolean
private readonly lowerCaseAttributeNames: boolean
private readonly recognizeSelfClosing: boolean
- /** We are parsing HTML. Inverse of the `xmlMode` option. */
- private readonly htmlMode: boolean
private readonly tokenizer: Tokenizer
private readonly buffers: string[] = []
private readonly options: ParserOptions = {}
) {
this.cbs = cbs ?? {}
- this.htmlMode = !this.options.xmlMode
- this.lowerCaseTagNames = options.lowerCaseTags ?? this.htmlMode
- this.lowerCaseAttributeNames =
- options.lowerCaseAttributeNames ?? this.htmlMode
- this.recognizeSelfClosing = options.recognizeSelfClosing ?? !this.htmlMode
+ this.lowerCaseTagNames = options.lowerCaseTags ?? true
+ this.lowerCaseAttributeNames = options.lowerCaseAttributeNames ?? true
+ this.recognizeSelfClosing = options.recognizeSelfClosing ?? false
this.tokenizer = new (options.Tokenizer ?? Tokenizer)(this.options, this)
- this.foreignContext = [!this.htmlMode]
+ this.foreignContext = [false]
this.cbs.onparserinit?.(this)
}
* to specify your own additional void elements.
*/
protected isVoidElement(name: string): boolean {
- return this.htmlMode && voidElements.has(name)
+ return voidElements.has(name)
}
/** @internal */
this.openTagStart = this.startIndex
this.tagname = name
- const impliesClose = this.htmlMode && openImpliesClose.get(name)
+ const impliesClose = openImpliesClose.get(name)
if (impliesClose) {
while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
if (!this.isVoidElement(name)) {
this.stack.unshift(name)
- if (this.htmlMode) {
- if (foreignContextElements.has(name)) {
- this.foreignContext.unshift(true)
- } else if (htmlIntegrationElements.has(name)) {
- this.foreignContext.unshift(false)
- }
+ if (foreignContextElements.has(name)) {
+ this.foreignContext.unshift(true)
+ } else if (htmlIntegrationElements.has(name)) {
+ this.foreignContext.unshift(false)
}
}
this.cbs.onopentagname?.(name)
name = name.toLowerCase()
}
- if (
- this.htmlMode &&
- (foreignContextElements.has(name) || htmlIntegrationElements.has(name))
- ) {
+ if (foreignContextElements.has(name) || htmlIntegrationElements.has(name)) {
this.foreignContext.shift()
}
// We know the stack has sufficient elements.
this.cbs.onclosetag?.(element, index !== pos)
}
- } else if (this.htmlMode && name === 'p') {
+ } else if (name === 'p') {
// Implicit open before close
this.emitOpenTag('p')
this.closeCurrentTag(true)
}
- } else if (this.htmlMode && name === 'br') {
+ } else if (name === 'br') {
// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
this.cbs.onopentagname?.('br')
this.cbs.onopentag?.('br', {}, true)
this.endIndex = endIndex
const value = this.getSlice(start, endIndex - offset)
- if (!this.htmlMode || this.options.recognizeCDATA) {
+ if (this.options.recognizeCDATA) {
this.cbs.oncdatastart?.()
this.cbs.ontext?.(value)
this.cbs.oncdataend?.()
this.cbs.onparserinit?.(this)
this.buffers.length = 0
this.foreignContext.length = 0
- this.foreignContext.unshift(!this.htmlMode)
+ this.foreignContext.unshift(false)
this.bufferOffset = 0
this.writeIndex = 0
this.ended = false
import {
EntityDecoder,
DecodingMode,
- htmlDecodeTree,
- xmlDecodeTree
+ htmlDecodeTree
} from 'entities/lib/decode.js'
const enum CharCodes {
return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c)
}
-function isASCIIAlpha(c: number): boolean {
- return (
- (c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||
- (c >= CharCodes.UpperA && c <= CharCodes.UpperZ)
- )
-}
-
export enum QuoteType {
NoValue = 0,
Unquoted = 1,
/** The offset of the current buffer. */
private offset = 0
- private readonly xmlMode: boolean
private readonly decodeEntities: boolean
private readonly entityDecoder: EntityDecoder
constructor(
- {
- xmlMode = false,
- decodeEntities = true
- }: { xmlMode?: boolean; decodeEntities?: boolean },
+ { decodeEntities = true }: { decodeEntities?: boolean },
private readonly cbs: Callbacks
) {
- this.xmlMode = xmlMode
this.decodeEntities = decodeEntities
- this.entityDecoder = new EntityDecoder(
- xmlMode ? xmlDecodeTree : htmlDecodeTree,
- (cp, consumed) => this.emitCodePoint(cp, consumed)
+ this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) =>
+ this.emitCodePoint(cp, consumed)
)
}
/**
* HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
- *
- * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
- * We allow anything that wouldn't end the tag.
*/
private isTagStartChar(c: number) {
- return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c)
+ return (
+ (c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||
+ (c >= CharCodes.UpperA && c <= CharCodes.UpperZ)
+ )
}
private startSpecial(sequence: Uint8Array, offset: number) {
} else if (this.isTagStartChar(c)) {
const lower = c | 0x20
this.sectionStart = this.index
- if (!this.xmlMode && lower === Sequences.TitleEnd[2]) {
+ if (lower === Sequences.TitleEnd[2]) {
this.startSpecial(Sequences.TitleEnd, 3)
} else {
this.state =
- !this.xmlMode && lower === Sequences.ScriptEnd[2]
+ lower === Sequences.ScriptEnd[2]
? State.BeforeSpecialS
: State.InTagName
}
this.state = State.InEntity
this.entityStart = this.index
this.entityDecoder.startEntity(
- this.xmlMode
- ? DecodingMode.Strict
- : this.baseState === State.Text || this.baseState === State.InSpecialTag
+ this.baseState === State.Text || this.baseState === State.InSpecialTag
? DecodingMode.Legacy
: DecodingMode.Attribute
)