wip: parse mode

author Evan You <yyx990803@gmail.com>

Fri, 17 Nov 2023 01:22:12 +0000 (09:22 +0800)

committer Evan You <yyx990803@gmail.com>

Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
author Evan You <yyx990803@gmail.com>
Fri, 17 Nov 2023 01:22:12 +0000 (09:22 +0800)
committer Evan You <yyx990803@gmail.com>
Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
diff --git a/packages/compiler-core/src/index.ts b/packages/compiler-core/src/index.ts

index 5ce3134fc13d870159146f06bbab7e86940e0d3c..c88844a5c06216577ae6711525b5eeedb204ee2f 100644 (file)
--- a/packages/compiler-core/src/index.ts
+++ b/packages/compiler-core/src/index.ts
@@ -71,4 +71,4 @@ export {
    CompilerDeprecationTypes
  } from './compat/compatConfig'
  
-// export { baseParse as newParse } from './parser/index'
+export { baseParse as newParse } from './parser/index'
diff --git a/packages/compiler-core/src/options.ts b/packages/compiler-core/src/options.ts

index abfba98e35c5126492fc32681cd91efc0a29bedc..3ab42625ea212e64ec3a5cfc407db8ce5092f2e9 100644 (file)
--- a/packages/compiler-core/src/options.ts
+++ b/packages/compiler-core/src/options.ts
@@ -17,6 +17,7 @@ export interface ErrorHandlingOptions {
  export interface ParserOptions
    extends ErrorHandlingOptions,
      CompilerCompatOptions {
+  parseMode?: 'base' | 'html' | 'sfc'
    /**
     * e.g. platform native elements, e.g. `<div>` for browsers
     */
diff --git a/packages/compiler-core/src/parse.ts b/packages/compiler-core/src/parse.ts

index c237239db9eb927b099a6bef82b168faac1fbd9c..d9cdb4e59a68f1f624ab92a1e3060ddc4c16bc59 100644 (file)
--- a/packages/compiler-core/src/parse.ts
+++ b/packages/compiler-core/src/parse.ts
@@ -40,6 +40,7 @@ import {
  } from './compat/compatConfig'
  
  type OptionalOptions =
+  | 'parseMode'
    | 'whitespace'
    | 'isNativeTag'
    | 'isBuiltInComponent'
diff --git a/packages/compiler-core/src/parser/Tokenizer.ts b/packages/compiler-core/src/parser/Tokenizer.ts

index 20cb14340fa2dd61452c58fd6bf4ac7fdcfb58e8..96b63ead33d3b94ed5eac86feac006c22fed974f 100644 (file)
--- a/packages/compiler-core/src/parser/Tokenizer.ts
+++ b/packages/compiler-core/src/parser/Tokenizer.ts
@@ -27,7 +27,7 @@ import {
    DecodingMode,
    htmlDecodeTree
  } from 'entities/lib/decode.js'
-import { Position } from '../ast'
+import { ElementNode, Position } from '../ast'
  
  export const enum ParseMode {
    BASE,
@@ -119,7 +119,9 @@ const enum State {
    SpecialStartSequence,
    InSpecialTag,
  
-  InEntity
+  InEntity,
+
+  InSFCRootTagName
  }
  
  /**
@@ -147,6 +149,14 @@ function isEndOfTagSection(c: number): boolean {
    return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c)
  }
  
+export function toCharCodes(str: string): Uint8Array {
+  const ret = new Uint8Array(str.length)
+  for (let i = 0; i < str.length; i++) {
+    ret[i] = str.charCodeAt(i)
+  }
+  return ret
+}
+
  export enum QuoteType {
    NoValue = 0,
    Unquoted = 1,
@@ -221,14 +231,20 @@ export default class Tokenizer {
  
    private readonly entityDecoder: EntityDecoder
  
-  constructor(private readonly cbs: Callbacks) {
+  constructor(
+    private readonly stack: ElementNode[],
+    private readonly cbs: Callbacks
+  ) {
      this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) =>
        this.emitCodePoint(cp, consumed)
      )
    }
  
+  public mode = ParseMode.BASE
+
    public reset(): void {
      this.state = State.Text
+    this.mode = ParseMode.BASE
      this.buffer = ''
      this.sectionStart = 0
      this.index = 0
@@ -329,7 +345,7 @@ export default class Tokenizer {
      this.stateInTagName(c)
    }
  
-  /** Look for an end tag. For <title> tags, also decode entities. */
+  /** Look for an end tag. For <title> and <textarea>, also decode entities. */
    private stateInSpecialTag(c: number): void {
      if (this.sequenceIndex === this.currentSequence.length) {
        if (c === CharCodes.Gt || isWhitespace(c)) {
@@ -357,7 +373,8 @@ export default class Tokenizer {
      } else if (this.sequenceIndex === 0) {
        if (
          this.currentSequence === Sequences.TitleEnd ||
-        this.currentSequence === Sequences.TextareaEnd
+        (this.currentSequence === Sequences.TextareaEnd &&
+          !(this.mode === ParseMode.SFC && this.stack.length === 0))
        ) {
          // We have to parse entities in <title> and <textarea> tags.
          if (c === CharCodes.Amp) {
@@ -459,15 +476,26 @@ export default class Tokenizer {
        this.state = State.InProcessingInstruction
        this.sectionStart = this.index + 1
      } else if (isTagStartChar(c)) {
-      const lower = c | 0x20
        this.sectionStart = this.index
-      if (lower === Sequences.TitleEnd[2]) {
-        this.state = State.BeforeSpecialT
+      if (this.mode === ParseMode.BASE) {
+        // no special tags in base mode
+        this.state = State.InTagName
+      } else if (this.mode === ParseMode.SFC && this.stack.length === 0) {
+        // SFC mode + root level
+        // - everything except <template> is RAWTEXT
+        // - <template> with lang other than html is also RAWTEXT
+        this.state = State.InSFCRootTagName
        } else {
-        this.state =
-          lower === Sequences.ScriptEnd[2]
-            ? State.BeforeSpecialS
-            : State.InTagName
+        // HTML mode
+        // - <script>, <style> RAWTEXT
+        // - <title>, <textarea> RCDATA
+        const lower = c | 0x20
+        if (lower === 116 /* t */) {
+          this.state = State.BeforeSpecialT
+        } else {
+          this.state =
+            lower === 115 /* s */ ? State.BeforeSpecialS : State.InTagName
+        }
        }
      } else if (c === CharCodes.Slash) {
        this.state = State.BeforeClosingTagName
@@ -478,12 +506,25 @@ export default class Tokenizer {
    }
    private stateInTagName(c: number): void {
      if (isEndOfTagSection(c)) {
-      this.cbs.onopentagname(this.sectionStart, this.index)
-      this.sectionStart = -1
-      this.state = State.BeforeAttributeName
-      this.stateBeforeAttributeName(c)
+      this.handleTagName(c)
+    }
+  }
+  private stateInSFCRootTagName(c: number): void {
+    if (isEndOfTagSection(c)) {
+      const tag = this.buffer.slice(this.sectionStart, this.index)
+      if (tag !== 'template') {
+        this.isSpecial = true
+        this.currentSequence = toCharCodes(`</` + tag)
+      }
+      this.handleTagName(c)
      }
    }
+  private handleTagName(c: number) {
+    this.cbs.onopentagname(this.sectionStart, this.index)
+    this.sectionStart = -1
+    this.state = State.BeforeAttributeName
+    this.stateBeforeAttributeName(c)
+  }
    private stateBeforeClosingTagName(c: number): void {
      if (isWhitespace(c)) {
        // Ignore
@@ -830,6 +871,10 @@ export default class Tokenizer {
            this.stateInTagName(c)
            break
          }
+        case State.InSFCRootTagName: {
+          this.stateInSFCRootTagName(c)
+          break
+        }
          case State.InClosingTagName: {
            this.stateInClosingTagName(c)
            break
diff --git a/packages/compiler-core/src/parser/index.ts b/packages/compiler-core/src/parser/index.ts

index f6df84a2654dcf573ff4bd9605f1b441bafd2d2c..7447f535045bd427864ac1f83440635883d9c821 100644 (file)
--- a/packages/compiler-core/src/parser/index.ts
+++ b/packages/compiler-core/src/parser/index.ts
@@ -14,17 +14,23 @@ import {
    createRoot
  } from '../ast'
  import { ParserOptions } from '../options'
-import Tokenizer, { CharCodes, QuoteType, isWhitespace } from './Tokenizer'
+import Tokenizer, {
+  CharCodes,
+  ParseMode,
+  QuoteType,
+  isWhitespace,
+  toCharCodes
+} from './Tokenizer'
  import { CompilerCompatOptions } from '../compat/compatConfig'
  import { NO, extend } from '@vue/shared'
  import { defaultOnError, defaultOnWarn } from '../errors'
  import { isCoreComponent } from '../utils'
-import { TextModes } from '../parse'
  
  type OptionalOptions =
    | 'whitespace'
    | 'isNativeTag'
    | 'isBuiltInComponent'
+  | 'getTextMode'
    | keyof CompilerCompatOptions
  
  type MergedParserOptions = Omit<Required<ParserOptions>, OptionalOptions> &
@@ -43,9 +49,9 @@ const decodeMap: Record<string, string> = {
  }
  
  export const defaultParserOptions: MergedParserOptions = {
+  parseMode: 'base',
    delimiters: [`{{`, `}}`],
    getNamespace: () => Namespaces.HTML,
-  getTextMode: () => TextModes.DATA,
    isVoidTag: NO,
    isPreTag: NO,
    isCustomElement: NO,
@@ -73,7 +79,7 @@ let inVPre = false
  let currentElementIsVPreBoundary = false
  const stack: ElementNode[] = []
  
-const tokenizer = new Tokenizer({
+const tokenizer = new Tokenizer(stack, {
    ontext(start, end) {
      onText(getSlice(start, end), start, end)
    },
@@ -598,23 +604,24 @@ function reset() {
    stack.length = 0
  }
  
-function toCharCodes(str: string): Uint8Array {
-  const ret = new Uint8Array()
-  for (let i = 0; i < str.length; i++) {
-    ret[i] = str.charCodeAt(i)
-  }
-  return ret
-}
-
  export function baseParse(input: string, options?: ParserOptions): RootNode {
    reset()
+  currentInput = input
+  currentOptions = extend({}, defaultParserOptions, options)
+
+  tokenizer.mode =
+    currentOptions.parseMode === 'html'
+      ? ParseMode.HTML
+      : currentOptions.parseMode === 'sfc'
+      ? ParseMode.SFC
+      : ParseMode.BASE
+
    const delimiters = options?.delimiters
    if (delimiters) {
      tokenizer.delimiterOpen = toCharCodes(delimiters[0])
      tokenizer.delimiterClose = toCharCodes(delimiters[1])
    }
-  currentInput = input
-  currentOptions = extend({}, defaultParserOptions, options)
+
    const root = (currentRoot = createRoot([]))
    tokenizer.parse(currentInput)
    root.loc.end = tokenizer.getPos(input.length)
diff --git a/packages/vue/src/index.ts b/packages/vue/src/index.ts

index 8215be7476ea1d7936d9c80129fca9edae26246d..c00103bfee4df4a321afc63be90d670e7447c581 100644 (file)
--- a/packages/vue/src/index.ts
+++ b/packages/vue/src/index.ts
@@ -91,3 +91,5 @@ registerRuntimeCompiler(compileToFunction)
  
  export { compileToFunction as compile }
  export * from '@vue/runtime-dom'
+
+export { newParse } from '@vue/compiler-dom'
author	Evan You <yyx990803@gmail.com>
	Fri, 17 Nov 2023 01:22:12 +0000 (09:22 +0800)
committer	Evan You <yyx990803@gmail.com>
	Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
packages/compiler-core/src/index.ts		patch \| blob \| blame \| history
packages/compiler-core/src/options.ts		patch \| blob \| blame \| history
packages/compiler-core/src/parse.ts		patch \| blob \| blame \| history
packages/compiler-core/src/parser/Tokenizer.ts		patch \| blob \| blame \| history
packages/compiler-core/src/parser/index.ts		patch \| blob \| blame \| history
packages/vue/src/index.ts		patch \| blob \| blame \| history