wip: save

author Evan You <yyx990803@gmail.com>

Mon, 13 Nov 2023 17:14:33 +0000 (01:14 +0800)

committer Evan You <yyx990803@gmail.com>

Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
author Evan You <yyx990803@gmail.com>
Mon, 13 Nov 2023 17:14:33 +0000 (01:14 +0800)
committer Evan You <yyx990803@gmail.com>
Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
diff --git a/packages/compiler-core/src/parser/Tokenizer.ts b/packages/compiler-core/src/parser/Tokenizer.ts

index cece336fc9940a2b86f1f6a892bd6c53cac4aac4..4dedf9a465c4b5eee69e2b241ab01d482d7f8569 100644 (file)
--- a/packages/compiler-core/src/parser/Tokenizer.ts
+++ b/packages/compiler-core/src/parser/Tokenizer.ts
@@ -171,6 +171,11 @@ export default class Tokenizer {
    private readonly decodeEntities: boolean
    private readonly entityDecoder: EntityDecoder
  
+  public line = 1
+  public column = 1
+  public startLine = 1
+  public startColumn = 1
+
    constructor(
      { decodeEntities = true }: { decodeEntities?: boolean },
      private readonly cbs: Callbacks
@@ -184,12 +189,22 @@ export default class Tokenizer {
    public reset(): void {
      this.state = State.Text
      this.buffer = ''
-    this.sectionStart = 0
+    this.recordStart(0)
      this.index = 0
+    this.line = 1
+    this.column = 1
+    this.startLine = 1
+    this.startColumn = 1
      this.baseState = State.Text
      this.currentSequence = undefined!
    }
  
+  private recordStart(start = this.index) {
+    this.sectionStart = start
+    this.startLine = this.line
+    this.startColumn = this.column + (start - this.index)
+  }
+
    private stateText(c: number): void {
      if (
        c === CharCodes.Lt ||
@@ -199,7 +214,7 @@ export default class Tokenizer {
          this.cbs.ontext(this.sectionStart, this.index)
        }
        this.state = State.BeforeTagName
-      this.sectionStart = this.index
+      this.recordStart()
      } else if (this.decodeEntities && c === CharCodes.Amp) {
        this.startEntity()
      }
@@ -242,7 +257,7 @@ export default class Tokenizer {
          }
  
          this.isSpecial = false
-        this.sectionStart = endOfText + 2 // Skip over the `</`
+        this.recordStart(endOfText + 2) // Skip over the `</`
          this.stateInClosingTagName(c)
          return // We are done; skip the rest of the function.
        }
@@ -274,7 +289,7 @@ export default class Tokenizer {
          this.state = State.InCommentLike
          this.currentSequence = Sequences.CdataEnd
          this.sequenceIndex = 0
-        this.sectionStart = this.index + 1
+        this.recordStart(this.index + 1)
        }
      } else {
        this.sequenceIndex = 0
@@ -325,7 +340,7 @@ export default class Tokenizer {
          }
  
          this.sequenceIndex = 0
-        this.sectionStart = this.index + 1
+        this.recordStart(this.index + 1)
          this.state = State.Text
        }
      } else if (this.sequenceIndex === 0) {
@@ -359,13 +374,13 @@ export default class Tokenizer {
    private stateBeforeTagName(c: number): void {
      if (c === CharCodes.ExclamationMark) {
        this.state = State.BeforeDeclaration
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
      } else if (c === CharCodes.Questionmark) {
        this.state = State.InProcessingInstruction
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
      } else if (this.isTagStartChar(c)) {
        const lower = c | 0x20
-      this.sectionStart = this.index
+      this.recordStart()
        if (lower === Sequences.TitleEnd[2]) {
          this.startSpecial(Sequences.TitleEnd, 3)
        } else {
@@ -384,7 +399,7 @@ export default class Tokenizer {
    private stateInTagName(c: number): void {
      if (isEndOfTagSection(c)) {
        this.cbs.onopentagname(this.sectionStart, this.index)
-      this.sectionStart = -1
+      this.recordStart(-1)
        this.state = State.BeforeAttributeName
        this.stateBeforeAttributeName(c)
      }
@@ -398,13 +413,13 @@ export default class Tokenizer {
        this.state = this.isTagStartChar(c)
          ? State.InClosingTagName
          : State.InSpecialComment
-      this.sectionStart = this.index
+      this.recordStart()
      }
    }
    private stateInClosingTagName(c: number): void {
      if (c === CharCodes.Gt || isWhitespace(c)) {
        this.cbs.onclosetag(this.sectionStart, this.index)
-      this.sectionStart = -1
+      this.recordStart(-1)
        this.state = State.AfterClosingTagName
        this.stateAfterClosingTagName(c)
      }
@@ -413,7 +428,7 @@ export default class Tokenizer {
      // Skip everything until ">"
      if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
        this.state = State.Text
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
      }
    }
    private stateBeforeAttributeName(c: number): void {
@@ -425,19 +440,19 @@ export default class Tokenizer {
        } else {
          this.state = State.Text
        }
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
      } else if (c === CharCodes.Slash) {
        this.state = State.InSelfClosingTag
      } else if (!isWhitespace(c)) {
        this.state = State.InAttributeName
-      this.sectionStart = this.index
+      this.recordStart()
      }
    }
    private stateInSelfClosingTag(c: number): void {
      if (c === CharCodes.Gt) {
        this.cbs.onselfclosingtag(this.index)
        this.state = State.Text
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
        this.isSpecial = false // Reset special state, in case of self-closing special tags
      } else if (!isWhitespace(c)) {
        this.state = State.BeforeAttributeName
@@ -447,7 +462,7 @@ export default class Tokenizer {
    private stateInAttributeName(c: number): void {
      if (c === CharCodes.Eq || isEndOfTagSection(c)) {
        this.cbs.onattribname(this.sectionStart, this.index)
-      this.sectionStart = this.index
+      this.recordStart()
        this.state = State.AfterAttributeName
        this.stateAfterAttributeName(c)
      }
@@ -457,24 +472,24 @@ export default class Tokenizer {
        this.state = State.BeforeAttributeValue
      } else if (c === CharCodes.Slash || c === CharCodes.Gt) {
        this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
-      this.sectionStart = -1
+      this.recordStart(-1)
        this.state = State.BeforeAttributeName
        this.stateBeforeAttributeName(c)
      } else if (!isWhitespace(c)) {
        this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
        this.state = State.InAttributeName
-      this.sectionStart = this.index
+      this.recordStart()
      }
    }
    private stateBeforeAttributeValue(c: number): void {
      if (c === CharCodes.DoubleQuote) {
        this.state = State.InAttributeValueDq
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
      } else if (c === CharCodes.SingleQuote) {
        this.state = State.InAttributeValueSq
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
      } else if (!isWhitespace(c)) {
-      this.sectionStart = this.index
+      this.recordStart()
        this.state = State.InAttributeValueNq
        this.stateInAttributeValueNoQuotes(c) // Reconsume token
      }
@@ -482,7 +497,7 @@ export default class Tokenizer {
    private handleInAttributeValue(c: number, quote: number) {
      if (c === quote || (!this.decodeEntities && this.fastForwardTo(quote))) {
        this.cbs.onattribdata(this.sectionStart, this.index)
-      this.sectionStart = -1
+      this.recordStart(-1)
        this.cbs.onattribend(
          quote === CharCodes.DoubleQuote ? QuoteType.Double : QuoteType.Single,
          this.index + 1
@@ -501,7 +516,7 @@ export default class Tokenizer {
    private stateInAttributeValueNoQuotes(c: number): void {
      if (isWhitespace(c) || c === CharCodes.Gt) {
        this.cbs.onattribdata(this.sectionStart, this.index)
-      this.sectionStart = -1
+      this.recordStart(-1)
        this.cbs.onattribend(QuoteType.Unquoted, this.index)
        this.state = State.BeforeAttributeName
        this.stateBeforeAttributeName(c)
@@ -522,14 +537,14 @@ export default class Tokenizer {
      if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
        this.cbs.ondeclaration(this.sectionStart, this.index)
        this.state = State.Text
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
      }
    }
    private stateInProcessingInstruction(c: number): void {
      if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
        this.cbs.onprocessinginstruction(this.sectionStart, this.index)
        this.state = State.Text
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
      }
    }
    private stateBeforeComment(c: number): void {
@@ -538,7 +553,7 @@ export default class Tokenizer {
        this.currentSequence = Sequences.CommentEnd
        // Allow short comments (eg. <!-->)
        this.sequenceIndex = 2
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
      } else {
        this.state = State.InDeclaration
      }
@@ -547,7 +562,7 @@ export default class Tokenizer {
      if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
        this.cbs.oncomment(this.sectionStart, this.index, 0)
        this.state = State.Text
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
      }
    }
    private stateBeforeSpecialS(c: number): void {
@@ -701,6 +716,13 @@ export default class Tokenizer {
          }
        }
        this.index++
+      // line / column handling
+      if (c === CharCodes.NewLine) {
+        this.line++
+        this.column = 1
+      } else {
+        this.column++
+      }
      }
      this.cleanup()
      this.finish()
@@ -717,14 +739,14 @@ export default class Tokenizer {
          (this.state === State.InSpecialTag && this.sequenceIndex === 0)
        ) {
          this.cbs.ontext(this.sectionStart, this.index)
-        this.sectionStart = this.index
+        this.recordStart()
        } else if (
          this.state === State.InAttributeValueDq ||
          this.state === State.InAttributeValueSq ||
          this.state === State.InAttributeValueNq
        ) {
          this.cbs.onattribdata(this.sectionStart, this.index)
-        this.sectionStart = this.index
+        this.recordStart()
        }
      }
    }
@@ -783,7 +805,7 @@ export default class Tokenizer {
        if (this.sectionStart < this.entityStart) {
          this.cbs.onattribdata(this.sectionStart, this.entityStart)
        }
-      this.sectionStart = this.entityStart + consumed
+      this.recordStart(this.entityStart + consumed)
        this.index = this.sectionStart - 1
  
        this.cbs.onattribentity(cp)
@@ -791,7 +813,7 @@ export default class Tokenizer {
        if (this.sectionStart < this.entityStart) {
          this.cbs.ontext(this.sectionStart, this.entityStart)
        }
-      this.sectionStart = this.entityStart + consumed
+      this.recordStart(this.entityStart + consumed)
        this.index = this.sectionStart - 1
  
        this.cbs.ontextentity(cp, this.sectionStart)
diff --git a/packages/compiler-core/src/parser/index.ts b/packages/compiler-core/src/parser/index.ts

index d12af24e39b4f1c369812815f0ba957126c96053..2c7cf540d07d4370b7e8ca41d738df1be2c6b2ab 100644 (file)
--- a/packages/compiler-core/src/parser/index.ts
+++ b/packages/compiler-core/src/parser/index.ts
@@ -135,16 +135,11 @@ const tokenizer = new Tokenizer(
    { decodeEntities: true },
    {
      ontext(start, end) {
-      const content = getSlice(start, end)
-      endIndex = end - 1
-      onText(content)
-      startIndex = end
+      onText(getSlice(start, end), start, end)
      },
  
      ontextentity(cp, end) {
-      endIndex = end - 1
-      onText(fromCodePoint(cp))
-      startIndex = end
+      onText(fromCodePoint(cp), end - 1, end)
      },
  
      onopentagname(start, end) {
@@ -206,7 +201,7 @@ const tokenizer = new Tokenizer(
      onattribentity(codepoint) {
        attribvalue += fromCodePoint(codepoint)
      },
-    onattribend(quote, end) {
+    onattribend(_quote, end) {
        endIndex = end
        if (attribs && !hasOwn(attribs, attribname)) {
          // TODO gen attributes AST nodes
@@ -299,7 +294,7 @@ function endOpenTag(isImplied: boolean) {
    tagname = ''
  }
  
-function onText(content: string) {
+function onText(content: string, start: number, end: number) {
    const parent = getParent()
    const lastNode = parent.children[parent.children.length - 1]
    if (lastNode?.type === NodeTypes.TEXT) {
@@ -310,8 +305,15 @@ function onText(content: string) {
      parent.children.push({
        type: NodeTypes.TEXT,
        content,
-      // @ts-ignore TODO
-      loc: {}
+      loc: {
+        start: {
+          offset: start,
+          line: tokenizer.startLine,
+          column: tokenizer.startColumn
+        },
+        end: { offset: end, line: tokenizer.line, column: tokenizer.column },
+        source: content
+      }
      })
    }
  }
@@ -327,8 +329,13 @@ function onOpenTag(tag: string) {
      // TODO props
      props: [],
      children: [],
-    // @ts-ignore TODO
-    loc: {},
+    loc: {
+      // @ts-expect-error TODO
+      start: {},
+      // @ts-expect-error TODO
+      end: { offset: endIndex },
+      source: ''
+    },
      codegenNode: undefined
    }
    addNode(el)
@@ -338,14 +345,25 @@ function onOpenTag(tag: string) {
  function onCloseTag() {
    const el = elementStack.pop()!
    // whitepsace management
-  const nodes = el.children
+  el.children = condenseWhitespace(el.children)
+}
+
+const windowsNewlineRE = /\r\n/g
+const consecutiveWhitespaceRE = /[\t\r\n\f ]+/g
+const nonWhitespaceRE = /[^\t\r\n\f ]/
+
+function isEmptyText(content: string) {
+  return !nonWhitespaceRE.test(content)
+}
+
+function condenseWhitespace(nodes: TemplateChildNode[]): TemplateChildNode[] {
    const shouldCondense = currentOptions.whitespace !== 'preserve'
    let removedWhitespace = false
    for (let i = 0; i < nodes.length; i++) {
      const node = nodes[i]
      if (node.type === NodeTypes.TEXT) {
        if (!inPre) {
-        if (!/[^\t\r\n\f ]/.test(node.content)) {
+        if (isEmptyText(node.content)) {
            const prev = nodes[i - 1]
            const next = nodes[i + 1]
            // Remove if:
@@ -376,19 +394,17 @@ function onCloseTag() {
          } else if (shouldCondense) {
            // in condense mode, consecutive whitespaces in text are condensed
            // down to a single space.
-          node.content = node.content.replace(/[\t\r\n\f ]+/g, ' ')
+          node.content = node.content.replace(consecutiveWhitespaceRE, ' ')
          }
        } else {
          // #6410 normalize windows newlines in <pre>:
          // in SSR, browsers normalize server-rendered \r\n into a single \n
          // in the DOM
-        node.content = node.content.replace(/\r\n/g, '\n')
+        node.content = node.content.replace(windowsNewlineRE, '\n')
        }
      }
    }
-  if (removedWhitespace) {
-    el.children = nodes.filter(Boolean)
-  }
+  return removedWhitespace ? nodes.filter(Boolean) : nodes
  }
  
  function addNode(node: TemplateChildNode) {
@@ -418,12 +434,11 @@ export function baseParse(
    options: ParserOptions = {}
  ): RootNode {
    reset()
-  currentInput = input.trim()
+  currentInput = input
    currentOptions = options
    htmlMode = !!options.htmlMode
    const root = (currentRoot = createRoot([]))
    tokenizer.parse(currentInput)
-  // temp hack for ts
-  console.log(endIndex)
+  root.children = condenseWhitespace(root.children)
    return root
  }
author	Evan You <yyx990803@gmail.com>
	Mon, 13 Nov 2023 17:14:33 +0000 (01:14 +0800)
committer	Evan You <yyx990803@gmail.com>
	Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
packages/compiler-core/src/parser/Tokenizer.ts		patch \| blob \| blame \| history
packages/compiler-core/src/parser/index.ts		patch \| blob \| blame \| history