wip: strip xmlMode / htmlMode

author Evan You <yyx990803@gmail.com>

Sun, 12 Nov 2023 09:01:05 +0000 (17:01 +0800)

committer Evan You <yyx990803@gmail.com>

Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
author Evan You <yyx990803@gmail.com>
Sun, 12 Nov 2023 09:01:05 +0000 (17:01 +0800)
committer Evan You <yyx990803@gmail.com>
Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
diff --git a/packages/compiler-core/src/parser/Parser.ts b/packages/compiler-core/src/parser/Parser.ts

index fba7fde6c8d31449bccd26072dff12bda4854491..6695ddd5d7112bc54505e3cf610aeab45f3d2ce7 100644 (file)
--- a/packages/compiler-core/src/parser/Parser.ts
+++ b/packages/compiler-core/src/parser/Parser.ts
@@ -102,16 +102,6 @@ const htmlIntegrationElements = new Set([
  ])
  
  export interface ParserOptions {
-  /**
-   * Indicates whether special tags (`<script>`, `<style>`, and `<title>`) should get special treatment
-   * and if "empty" tags (eg. `<br>`) can have children.  If `false`, the content of special tags
-   * will be text only. For feeds and other XML content (documents that don't consist of HTML),
-   * set this to `true`.
-   *
-   * @default false
-   */
-  xmlMode?: boolean
-
    /**
     * Decode entities within the document.
     *
@@ -122,14 +112,14 @@ export interface ParserOptions {
    /**
     * If set to true, all tags will be lowercased.
     *
-   * @default !xmlMode
+   * @default true
     */
    lowerCaseTags?: boolean
  
    /**
     * If set to `true`, all attribute names will be lowercased. This has noticeable impact on speed.
     *
-   * @default !xmlMode
+   * @default true
     */
    lowerCaseAttributeNames?: boolean
  
@@ -137,7 +127,7 @@ export interface ParserOptions {
     * If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled.
     * NOTE: If xmlMode is set to `true` then CDATA sections will always be recognized as text.
     *
-   * @default xmlMode
+   * @default false
     */
    recognizeCDATA?: boolean
  
@@ -145,7 +135,7 @@ export interface ParserOptions {
     * If set to `true`, self-closing tags will trigger the onclosetag event even if xmlMode is not set to `true`.
     * NOTE: If xmlMode is set to `true` then self-closing tags will always be recognized.
     *
-   * @default xmlMode
+   * @default false
     */
    recognizeSelfClosing?: boolean
  
@@ -218,8 +208,6 @@ export class Parser implements Callbacks {
    private readonly lowerCaseTagNames: boolean
    private readonly lowerCaseAttributeNames: boolean
    private readonly recognizeSelfClosing: boolean
-  /** We are parsing HTML. Inverse of the `xmlMode` option. */
-  private readonly htmlMode: boolean
    private readonly tokenizer: Tokenizer
  
    private readonly buffers: string[] = []
@@ -234,13 +222,11 @@ export class Parser implements Callbacks {
      private readonly options: ParserOptions = {}
    ) {
      this.cbs = cbs ?? {}
-    this.htmlMode = !this.options.xmlMode
-    this.lowerCaseTagNames = options.lowerCaseTags ?? this.htmlMode
-    this.lowerCaseAttributeNames =
-      options.lowerCaseAttributeNames ?? this.htmlMode
-    this.recognizeSelfClosing = options.recognizeSelfClosing ?? !this.htmlMode
+    this.lowerCaseTagNames = options.lowerCaseTags ?? true
+    this.lowerCaseAttributeNames = options.lowerCaseAttributeNames ?? true
+    this.recognizeSelfClosing = options.recognizeSelfClosing ?? false
      this.tokenizer = new (options.Tokenizer ?? Tokenizer)(this.options, this)
-    this.foreignContext = [!this.htmlMode]
+    this.foreignContext = [false]
      this.cbs.onparserinit?.(this)
    }
  
@@ -266,7 +252,7 @@ export class Parser implements Callbacks {
     * to specify your own additional void elements.
     */
    protected isVoidElement(name: string): boolean {
-    return this.htmlMode && voidElements.has(name)
+    return voidElements.has(name)
    }
  
    /** @internal */
@@ -286,7 +272,7 @@ export class Parser implements Callbacks {
      this.openTagStart = this.startIndex
      this.tagname = name
  
-    const impliesClose = this.htmlMode && openImpliesClose.get(name)
+    const impliesClose = openImpliesClose.get(name)
  
      if (impliesClose) {
        while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
@@ -297,12 +283,10 @@ export class Parser implements Callbacks {
      if (!this.isVoidElement(name)) {
        this.stack.unshift(name)
  
-      if (this.htmlMode) {
-        if (foreignContextElements.has(name)) {
-          this.foreignContext.unshift(true)
-        } else if (htmlIntegrationElements.has(name)) {
-          this.foreignContext.unshift(false)
-        }
+      if (foreignContextElements.has(name)) {
+        this.foreignContext.unshift(true)
+      } else if (htmlIntegrationElements.has(name)) {
+        this.foreignContext.unshift(false)
        }
      }
      this.cbs.onopentagname?.(name)
@@ -342,10 +326,7 @@ export class Parser implements Callbacks {
        name = name.toLowerCase()
      }
  
-    if (
-      this.htmlMode &&
-      (foreignContextElements.has(name) || htmlIntegrationElements.has(name))
-    ) {
+    if (foreignContextElements.has(name) || htmlIntegrationElements.has(name)) {
        this.foreignContext.shift()
      }
  
@@ -357,12 +338,12 @@ export class Parser implements Callbacks {
            // We know the stack has sufficient elements.
            this.cbs.onclosetag?.(element, index !== pos)
          }
-      } else if (this.htmlMode && name === 'p') {
+      } else if (name === 'p') {
          // Implicit open before close
          this.emitOpenTag('p')
          this.closeCurrentTag(true)
        }
-    } else if (this.htmlMode && name === 'br') {
+    } else if (name === 'br') {
        // We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
        this.cbs.onopentagname?.('br')
        this.cbs.onopentag?.('br', {}, true)
@@ -497,7 +478,7 @@ export class Parser implements Callbacks {
      this.endIndex = endIndex
      const value = this.getSlice(start, endIndex - offset)
  
-    if (!this.htmlMode || this.options.recognizeCDATA) {
+    if (this.options.recognizeCDATA) {
        this.cbs.oncdatastart?.()
        this.cbs.ontext?.(value)
        this.cbs.oncdataend?.()
@@ -537,7 +518,7 @@ export class Parser implements Callbacks {
      this.cbs.onparserinit?.(this)
      this.buffers.length = 0
      this.foreignContext.length = 0
-    this.foreignContext.unshift(!this.htmlMode)
+    this.foreignContext.unshift(false)
      this.bufferOffset = 0
      this.writeIndex = 0
      this.ended = false
diff --git a/packages/compiler-core/src/parser/Tokenizer.ts b/packages/compiler-core/src/parser/Tokenizer.ts

index 4478c813a93d0f22f1d794d4eb474d7fdd9e8aa3..b87cc52d8f67588198bdd09ef74919ca4b0d0655 100644 (file)
--- a/packages/compiler-core/src/parser/Tokenizer.ts
+++ b/packages/compiler-core/src/parser/Tokenizer.ts
@@ -1,8 +1,7 @@
  import {
    EntityDecoder,
    DecodingMode,
-  htmlDecodeTree,
-  xmlDecodeTree
+  htmlDecodeTree
  } from 'entities/lib/decode.js'
  
  const enum CharCodes {
@@ -89,13 +88,6 @@ function isEndOfTagSection(c: number): boolean {
    return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c)
  }
  
-function isASCIIAlpha(c: number): boolean {
-  return (
-    (c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||
-    (c >= CharCodes.UpperA && c <= CharCodes.UpperZ)
-  )
-}
-
  export enum QuoteType {
    NoValue = 0,
    Unquoted = 1,
@@ -156,22 +148,16 @@ export default class Tokenizer {
    /** The offset of the current buffer. */
    private offset = 0
  
-  private readonly xmlMode: boolean
    private readonly decodeEntities: boolean
    private readonly entityDecoder: EntityDecoder
  
    constructor(
-    {
-      xmlMode = false,
-      decodeEntities = true
-    }: { xmlMode?: boolean; decodeEntities?: boolean },
+    { decodeEntities = true }: { decodeEntities?: boolean },
      private readonly cbs: Callbacks
    ) {
-    this.xmlMode = xmlMode
      this.decodeEntities = decodeEntities
-    this.entityDecoder = new EntityDecoder(
-      xmlMode ? xmlDecodeTree : htmlDecodeTree,
-      (cp, consumed) => this.emitCodePoint(cp, consumed)
+    this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) =>
+      this.emitCodePoint(cp, consumed)
      )
    }
  
@@ -358,12 +344,12 @@ export default class Tokenizer {
  
    /**
     * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
-   *
-   * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
-   * We allow anything that wouldn't end the tag.
     */
    private isTagStartChar(c: number) {
-    return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c)
+    return (
+      (c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||
+      (c >= CharCodes.UpperA && c <= CharCodes.UpperZ)
+    )
    }
  
    private startSpecial(sequence: Uint8Array, offset: number) {
@@ -383,11 +369,11 @@ export default class Tokenizer {
      } else if (this.isTagStartChar(c)) {
        const lower = c | 0x20
        this.sectionStart = this.index
-      if (!this.xmlMode && lower === Sequences.TitleEnd[2]) {
+      if (lower === Sequences.TitleEnd[2]) {
          this.startSpecial(Sequences.TitleEnd, 3)
        } else {
          this.state =
-          !this.xmlMode && lower === Sequences.ScriptEnd[2]
+          lower === Sequences.ScriptEnd[2]
              ? State.BeforeSpecialS
              : State.InTagName
        }
@@ -584,9 +570,7 @@ export default class Tokenizer {
      this.state = State.InEntity
      this.entityStart = this.index
      this.entityDecoder.startEntity(
-      this.xmlMode
-        ? DecodingMode.Strict
-        : this.baseState === State.Text || this.baseState === State.InSpecialTag
+      this.baseState === State.Text || this.baseState === State.InSpecialTag
          ? DecodingMode.Legacy
          : DecodingMode.Attribute
      )
author	Evan You <yyx990803@gmail.com>
	Sun, 12 Nov 2023 09:01:05 +0000 (17:01 +0800)
committer	Evan You <yyx990803@gmail.com>
	Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
packages/compiler-core/src/parser/Parser.ts		patch \| blob \| blame \| history
packages/compiler-core/src/parser/Tokenizer.ts		patch \| blob \| blame \| history