wip: treat template with preprocessor as plain text

author Evan You <yyx990803@gmail.com>

Mon, 20 Nov 2023 14:49:16 +0000 (22:49 +0800)

committer Evan You <yyx990803@gmail.com>

Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
author Evan You <yyx990803@gmail.com>
Mon, 20 Nov 2023 14:49:16 +0000 (22:49 +0800)
committer Evan You <yyx990803@gmail.com>
Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
diff --git a/packages/compiler-core/src/parser/Tokenizer.ts b/packages/compiler-core/src/parser/Tokenizer.ts

index 3fe2656a759638dc0d573341e0721098cda39d43..c0767c1ef51a256ed3aadcec9376926f374f9a44 100644 (file)
--- a/packages/compiler-core/src/parser/Tokenizer.ts
+++ b/packages/compiler-core/src/parser/Tokenizer.ts
@@ -129,7 +129,7 @@ const enum State {
    BeforeSpecialS, // Decide if we deal with `<script` or `<style`
    BeforeSpecialT, // Decide if we deal with `<title` or `<textarea`
    SpecialStartSequence,
-  InSpecialTag,
+  InRCDATA,
  
    InEntity,
  
@@ -245,6 +245,11 @@ export default class Tokenizer {
  
    private readonly entityDecoder?: EntityDecoder
  
+  public mode = ParseMode.BASE
+  public get inSFCRoot() {
+    return this.mode === ParseMode.SFC && this.stack.length === 0
+  }
+
    constructor(
      private readonly stack: ElementNode[],
      private readonly cbs: Callbacks
@@ -256,8 +261,6 @@ export default class Tokenizer {
      }
    }
  
-  public mode = ParseMode.BASE
-
    public reset(): void {
      this.state = State.Text
      this.mode = ParseMode.BASE
@@ -328,8 +331,8 @@ export default class Tokenizer {
          this.delimiterIndex++
        }
      } else if (this.inRCDATA) {
-      this.state = State.InSpecialTag
-      this.stateInSpecialTag(c)
+      this.state = State.InRCDATA
+      this.stateInRCDATA(c)
      } else {
        this.state = State.Text
        this.stateText(c)
@@ -349,7 +352,7 @@ export default class Tokenizer {
        if (this.delimiterIndex === this.delimiterClose.length - 1) {
          this.cbs.oninterpolation(this.sectionStart, this.index + 1)
          if (this.inRCDATA) {
-          this.state = State.InSpecialTag
+          this.state = State.InRCDATA
          } else {
            this.state = State.Text
          }
@@ -386,7 +389,7 @@ export default class Tokenizer {
    }
  
    /** Look for an end tag. For <title> and <textarea>, also decode entities. */
-  private stateInSpecialTag(c: number): void {
+  private stateInRCDATA(c: number): void {
      if (this.sequenceIndex === this.currentSequence.length) {
        if (c === CharCodes.Gt || isWhitespace(c)) {
          const endOfText = this.index - this.currentSequence.length
@@ -413,8 +416,7 @@ export default class Tokenizer {
      } else if (this.sequenceIndex === 0) {
        if (
          this.currentSequence === Sequences.TitleEnd ||
-        (this.currentSequence === Sequences.TextareaEnd &&
-          !(this.mode === ParseMode.SFC && this.stack.length === 0))
+        (this.currentSequence === Sequences.TextareaEnd && !this.inSFCRoot)
        ) {
          // We have to parse entities in <title> and <textarea> tags.
          if (!__BROWSER__ && c === CharCodes.Amp) {
@@ -507,10 +509,14 @@ export default class Tokenizer {
    }
  
    private startSpecial(sequence: Uint8Array, offset: number) {
+    this.enterRCDATA(sequence, offset)
+    this.state = State.SpecialStartSequence
+  }
+
+  public enterRCDATA(sequence: Uint8Array, offset: number) {
      this.inRCDATA = true
      this.currentSequence = sequence
      this.sequenceIndex = offset
-    this.state = State.SpecialStartSequence
    }
  
    private stateBeforeTagName(c: number): void {
@@ -525,7 +531,7 @@ export default class Tokenizer {
        if (this.mode === ParseMode.BASE) {
          // no special tags in base mode
          this.state = State.InTagName
-      } else if (this.mode === ParseMode.SFC && this.stack.length === 0) {
+      } else if (this.inSFCRoot) {
          // SFC mode + root level
          // - everything except <template> is RAWTEXT
          // - <template> with lang other than html is also RAWTEXT
@@ -560,8 +566,7 @@ export default class Tokenizer {
      if (isEndOfTagSection(c)) {
        const tag = this.buffer.slice(this.sectionStart, this.index)
        if (tag !== 'template') {
-        this.inRCDATA = true
-        this.currentSequence = toCharCodes(`</` + tag)
+        this.enterRCDATA(toCharCodes(`</` + tag), 0)
        }
        this.handleTagName(c)
      }
@@ -603,8 +608,7 @@ export default class Tokenizer {
      if (c === CharCodes.Gt) {
        this.cbs.onopentagend(this.index)
        if (this.inRCDATA) {
-        this.state = State.InSpecialTag
-        this.sequenceIndex = 0
+        this.state = State.InRCDATA
        } else {
          this.state = State.Text
        }
@@ -827,7 +831,7 @@ export default class Tokenizer {
        this.state = State.InEntity
        this.entityStart = this.index
        this.entityDecoder!.startEntity(
-        this.baseState === State.Text || this.baseState === State.InSpecialTag
+        this.baseState === State.Text || this.baseState === State.InRCDATA
            ? DecodingMode.Legacy
            : DecodingMode.Attribute
        )
@@ -885,8 +889,8 @@ export default class Tokenizer {
            this.stateSpecialStartSequence(c)
            break
          }
-        case State.InSpecialTag: {
-          this.stateInSpecialTag(c)
+        case State.InRCDATA: {
+          this.stateInRCDATA(c)
            break
          }
          case State.CDATASequence: {
@@ -1016,7 +1020,7 @@ export default class Tokenizer {
      if (this.sectionStart !== this.index) {
        if (
          this.state === State.Text ||
-        (this.state === State.InSpecialTag && this.sequenceIndex === 0)
+        (this.state === State.InRCDATA && this.sequenceIndex === 0)
        ) {
          this.cbs.ontext(this.sectionStart, this.index)
          this.sectionStart = this.index
@@ -1083,10 +1087,7 @@ export default class Tokenizer {
  
    private emitCodePoint(cp: number, consumed: number): void {
      if (!__BROWSER__) {
-      if (
-        this.baseState !== State.Text &&
-        this.baseState !== State.InSpecialTag
-      ) {
+      if (this.baseState !== State.Text && this.baseState !== State.InRCDATA) {
          if (this.sectionStart < this.entityStart) {
            this.cbs.onattribdata(this.sectionStart, this.entityStart)
          }
diff --git a/packages/compiler-core/src/parser/index.ts b/packages/compiler-core/src/parser/index.ts

index 8bdcfb38e3fe488e5008abf1758ff430bbb5960c..99c7e0879cd56e13d2babd9f0aa11038590ef238 100644 (file)
--- a/packages/compiler-core/src/parser/index.ts
+++ b/packages/compiler-core/src/parser/index.ts
@@ -111,10 +111,9 @@ const tokenizer = new Tokenizer(stack, {
    onopentagname(start, end) {
      const name = getSlice(start, end)
      // in SFC mode, root-level tags locations are for its inner content.
-    const startIndex =
-      tokenizer.mode === ParseMode.SFC && stack.length === 0
-        ? end + fastForward(end, CharCodes.Gt) + 1
-        : start - 1
+    const startIndex = tokenizer.inSFCRoot
+      ? end + fastForward(end, CharCodes.Gt) + 1
+      : start - 1
      currentElement = {
        type: NodeTypes.ELEMENT,
        tag: name,
@@ -296,6 +295,16 @@ const tokenizer = new Tokenizer(stack, {
                  ? getLoc(currentAttrStartIndex, currentAttrEndIndex)
                  : getLoc(currentAttrStartIndex - 1, currentAttrEndIndex + 1)
            }
+          if (
+            currentAttrValue &&
+            tokenizer.inSFCRoot &&
+            currentElement.tag === 'template' &&
+            currentProp.name === 'lang'
+          ) {
+            // SFC root template with preprocessor lang, force tokenizer to
+            // RCDATA mode
+            tokenizer.enterRCDATA(toCharCodes(`</template`), 0)
+          }
          } else {
            // directive
            currentProp.rawExp = currentAttrValue
@@ -464,7 +473,7 @@ function onText(content: string, start: number, end: number) {
  
  function onCloseTag(el: ElementNode, end: number) {
    // attach end position
-  if (tokenizer.mode === ParseMode.SFC && stack.length === 0) {
+  if (tokenizer.inSFCRoot) {
      // SFC root tag, end position should be inner end
      if (el.children.length) {
        el.loc.end = extend({}, el.children[el.children.length - 1].loc.end)
diff --git a/packages/compiler-sfc/__tests__/parse.spec.ts b/packages/compiler-sfc/__tests__/parse.spec.ts

index 65a318c0bb07f98894ab33ac591488ba8a2ef0c6..6ae3427c74a095127bc0be4d72da8e5650ed3a5e 100644 (file)
--- a/packages/compiler-sfc/__tests__/parse.spec.ts
+++ b/packages/compiler-sfc/__tests__/parse.spec.ts
@@ -203,13 +203,15 @@ h1 { color: red }
    })
  
    // #1120
-  test('alternative template lang should be treated as plain text', () => {
-    const content = `p(v-if="1 < 2") test`
+  test('template with preprocessor lang should be treated as plain text', () => {
+    const content = `p(v-if="1 < 2") test <div/>`
      const { descriptor, errors } = parse(
        `<template lang="pug">` + content + `</template>`
      )
      expect(errors.length).toBe(0)
      expect(descriptor.template!.content).toBe(content)
+    // should not attempt to parse the content
+    expect(descriptor.template!.ast.children.length).toBe(1)
    })
  
    //#2566
diff --git a/packages/compiler-sfc/src/compileTemplate.ts b/packages/compiler-sfc/src/compileTemplate.ts

index 8c5e492fabd7d8a371af501a4122f8cf954306d5..d8c1059087f67480b5731f38b8e0aa9ff893b0de 100644 (file)
--- a/packages/compiler-sfc/src/compileTemplate.ts
+++ b/packages/compiler-sfc/src/compileTemplate.ts
@@ -132,7 +132,8 @@ export function compileTemplate(
      try {
        return doCompileTemplate({
          ...options,
-        source: preprocess(options, preprocessor)
+        source: preprocess(options, preprocessor),
+        ast: undefined // invalidate AST if template goes through preprocessor
        })
      } catch (e: any) {
        return {
diff --git a/packages/compiler-sfc/src/parse.ts b/packages/compiler-sfc/src/parse.ts

index 22fde21550a85131e19879b6a87c243a2c058b9a..3330254d24192ac0dc4e3253ef6594fb7b5fbe9f 100644 (file)
--- a/packages/compiler-sfc/src/parse.ts
+++ b/packages/compiler-sfc/src/parse.ts
@@ -245,8 +245,10 @@ export function parse(
          )
        }
      }
-    // no need to genMap for template as its AST already accounts for the
-    // position in the SFC
+    // only genMap for template when it needs preprocessor
+    if (descriptor.template && descriptor.template.lang) {
+      genMap(descriptor.template)
+    }
      genMap(descriptor.script)
      descriptor.styles.forEach(genMap)
      descriptor.customBlocks.forEach(genMap)
author	Evan You <yyx990803@gmail.com>
	Mon, 20 Nov 2023 14:49:16 +0000 (22:49 +0800)
committer	Evan You <yyx990803@gmail.com>
	Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
packages/compiler-core/src/parser/Tokenizer.ts		patch \| blob \| blame \| history
packages/compiler-core/src/parser/index.ts		patch \| blob \| blame \| history
packages/compiler-sfc/__tests__/parse.spec.ts		patch \| blob \| blame \| history
packages/compiler-sfc/src/compileTemplate.ts		patch \| blob \| blame \| history
packages/compiler-sfc/src/parse.ts		patch \| blob \| blame \| history