wip: refactor line / column generation

author Evan You <yyx990803@gmail.com>

Tue, 14 Nov 2023 08:35:52 +0000 (16:35 +0800)

committer Evan You <yyx990803@gmail.com>

Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
author Evan You <yyx990803@gmail.com>
Tue, 14 Nov 2023 08:35:52 +0000 (16:35 +0800)
committer Evan You <yyx990803@gmail.com>
Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
diff --git a/packages/compiler-core/src/parser/Tokenizer.ts b/packages/compiler-core/src/parser/Tokenizer.ts

index 4dedf9a465c4b5eee69e2b241ab01d482d7f8569..8c2269b1e446f9f67d2342afe559d6dea3a1973e 100644 (file)
--- a/packages/compiler-core/src/parser/Tokenizer.ts
+++ b/packages/compiler-core/src/parser/Tokenizer.ts
@@ -27,6 +27,7 @@ import {
    DecodingMode,
    htmlDecodeTree
  } from 'entities/lib/decode.js'
+import { Position } from '../ast'
  
  const enum CharCodes {
    Tab = 0x9, // "\t"
@@ -120,21 +121,25 @@ export enum QuoteType {
  }
  
  export interface Callbacks {
+  ontext(start: number, endIndex: number): void
+  ontextentity(codepoint: number, endIndex: number): void
+
+  onopentagname(start: number, endIndex: number): void
+  onopentagend(endIndex: number): void
+  onselfclosingtag(endIndex: number): void
+  onclosetag(start: number, endIndex: number): void
+
    onattribdata(start: number, endIndex: number): void
    onattribentity(codepoint: number): void
    onattribend(quote: QuoteType, endIndex: number): void
    onattribname(start: number, endIndex: number): void
-  oncdata(start: number, endIndex: number, endOffset: number): void
-  onclosetag(start: number, endIndex: number): void
+
    oncomment(start: number, endIndex: number, endOffset: number): void
-  ondeclaration(start: number, endIndex: number): void
+  oncdata(start: number, endIndex: number, endOffset: number): void
+
+  // onprocessinginstruction(start: number, endIndex: number): void
+  // ondeclaration(start: number, endIndex: number): void
    onend(): void
-  onopentagend(endIndex: number): void
-  onopentagname(start: number, endIndex: number): void
-  onprocessinginstruction(start: number, endIndex: number): void
-  onselfclosingtag(endIndex: number): void
-  ontext(start: number, endIndex: number): void
-  ontextentity(codepoint: number, endIndex: number): void
  }
  
  /**
@@ -167,15 +172,12 @@ export default class Tokenizer {
    private baseState = State.Text
    /** For special parsing behavior inside of script and style tags. */
    private isSpecial = false
+  /** Reocrd newline positions for fast line / column calculation */
+  private newlines: number[] = []
  
    private readonly decodeEntities: boolean
    private readonly entityDecoder: EntityDecoder
  
-  public line = 1
-  public column = 1
-  public startLine = 1
-  public startColumn = 1
-
    constructor(
      { decodeEntities = true }: { decodeEntities?: boolean },
      private readonly cbs: Callbacks
@@ -189,20 +191,35 @@ export default class Tokenizer {
    public reset(): void {
      this.state = State.Text
      this.buffer = ''
-    this.recordStart(0)
+    this.sectionStart = 0
      this.index = 0
-    this.line = 1
-    this.column = 1
-    this.startLine = 1
-    this.startColumn = 1
      this.baseState = State.Text
      this.currentSequence = undefined!
+    this.newlines.length = 0
    }
  
-  private recordStart(start = this.index) {
-    this.sectionStart = start
-    this.startLine = this.line
-    this.startColumn = this.column + (start - this.index)
+  /**
+   * Generate Position object with line / column information using recorded
+   * newline positions. We know the index is always going to be an already
+   * processed index, so all the newlines up to this index should have been
+   * recorded.
+   */
+  public getPositionForIndex(index: number): Position {
+    let line = 1
+    let column = index + 1
+    for (let i = this.newlines.length - 1; i >= 0; i--) {
+      const newlineIndex = this.newlines[i]
+      if (index > newlineIndex) {
+        line = i + 2
+        column = index - newlineIndex
+        break
+      }
+    }
+    return {
+      offset: index,
+      line,
+      column
+    }
    }
  
    private stateText(c: number): void {
@@ -214,7 +231,7 @@ export default class Tokenizer {
          this.cbs.ontext(this.sectionStart, this.index)
        }
        this.state = State.BeforeTagName
-      this.recordStart()
+      this.sectionStart = this.index
      } else if (this.decodeEntities && c === CharCodes.Amp) {
        this.startEntity()
      }
@@ -257,7 +274,7 @@ export default class Tokenizer {
          }
  
          this.isSpecial = false
-        this.recordStart(endOfText + 2) // Skip over the `</`
+        this.sectionStart = endOfText + 2 // Skip over the `</`
          this.stateInClosingTagName(c)
          return // We are done; skip the rest of the function.
        }
@@ -289,7 +306,7 @@ export default class Tokenizer {
          this.state = State.InCommentLike
          this.currentSequence = Sequences.CdataEnd
          this.sequenceIndex = 0
-        this.recordStart(this.index + 1)
+        this.sectionStart = this.index + 1
        }
      } else {
        this.sequenceIndex = 0
@@ -340,7 +357,7 @@ export default class Tokenizer {
          }
  
          this.sequenceIndex = 0
-        this.recordStart(this.index + 1)
+        this.sectionStart = this.index + 1
          this.state = State.Text
        }
      } else if (this.sequenceIndex === 0) {
@@ -374,13 +391,13 @@ export default class Tokenizer {
    private stateBeforeTagName(c: number): void {
      if (c === CharCodes.ExclamationMark) {
        this.state = State.BeforeDeclaration
-      this.recordStart(this.index + 1)
+      this.sectionStart = this.index + 1
      } else if (c === CharCodes.Questionmark) {
        this.state = State.InProcessingInstruction
-      this.recordStart(this.index + 1)
+      this.sectionStart = this.index + 1
      } else if (this.isTagStartChar(c)) {
        const lower = c | 0x20
-      this.recordStart()
+      this.sectionStart = this.index
        if (lower === Sequences.TitleEnd[2]) {
          this.startSpecial(Sequences.TitleEnd, 3)
        } else {
@@ -399,7 +416,7 @@ export default class Tokenizer {
    private stateInTagName(c: number): void {
      if (isEndOfTagSection(c)) {
        this.cbs.onopentagname(this.sectionStart, this.index)
-      this.recordStart(-1)
+      this.sectionStart = -1
        this.state = State.BeforeAttributeName
        this.stateBeforeAttributeName(c)
      }
@@ -413,13 +430,13 @@ export default class Tokenizer {
        this.state = this.isTagStartChar(c)
          ? State.InClosingTagName
          : State.InSpecialComment
-      this.recordStart()
+      this.sectionStart = this.index
      }
    }
    private stateInClosingTagName(c: number): void {
      if (c === CharCodes.Gt || isWhitespace(c)) {
        this.cbs.onclosetag(this.sectionStart, this.index)
-      this.recordStart(-1)
+      this.sectionStart = -1
        this.state = State.AfterClosingTagName
        this.stateAfterClosingTagName(c)
      }
@@ -428,7 +445,7 @@ export default class Tokenizer {
      // Skip everything until ">"
      if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
        this.state = State.Text
-      this.recordStart(this.index + 1)
+      this.sectionStart = this.index + 1
      }
    }
    private stateBeforeAttributeName(c: number): void {
@@ -440,19 +457,19 @@ export default class Tokenizer {
        } else {
          this.state = State.Text
        }
-      this.recordStart(this.index + 1)
+      this.sectionStart = this.index + 1
      } else if (c === CharCodes.Slash) {
        this.state = State.InSelfClosingTag
      } else if (!isWhitespace(c)) {
        this.state = State.InAttributeName
-      this.recordStart()
+      this.sectionStart = this.index
      }
    }
    private stateInSelfClosingTag(c: number): void {
      if (c === CharCodes.Gt) {
        this.cbs.onselfclosingtag(this.index)
        this.state = State.Text
-      this.recordStart(this.index + 1)
+      this.sectionStart = this.index + 1
        this.isSpecial = false // Reset special state, in case of self-closing special tags
      } else if (!isWhitespace(c)) {
        this.state = State.BeforeAttributeName
@@ -462,7 +479,7 @@ export default class Tokenizer {
    private stateInAttributeName(c: number): void {
      if (c === CharCodes.Eq || isEndOfTagSection(c)) {
        this.cbs.onattribname(this.sectionStart, this.index)
-      this.recordStart()
+      this.sectionStart = this.index
        this.state = State.AfterAttributeName
        this.stateAfterAttributeName(c)
      }
@@ -472,24 +489,24 @@ export default class Tokenizer {
        this.state = State.BeforeAttributeValue
      } else if (c === CharCodes.Slash || c === CharCodes.Gt) {
        this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
-      this.recordStart(-1)
+      this.sectionStart = -1
        this.state = State.BeforeAttributeName
        this.stateBeforeAttributeName(c)
      } else if (!isWhitespace(c)) {
        this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
        this.state = State.InAttributeName
-      this.recordStart()
+      this.sectionStart = this.index
      }
    }
    private stateBeforeAttributeValue(c: number): void {
      if (c === CharCodes.DoubleQuote) {
        this.state = State.InAttributeValueDq
-      this.recordStart(this.index + 1)
+      this.sectionStart = this.index + 1
      } else if (c === CharCodes.SingleQuote) {
        this.state = State.InAttributeValueSq
-      this.recordStart(this.index + 1)
+      this.sectionStart = this.index + 1
      } else if (!isWhitespace(c)) {
-      this.recordStart()
+      this.sectionStart = this.index
        this.state = State.InAttributeValueNq
        this.stateInAttributeValueNoQuotes(c) // Reconsume token
      }
@@ -497,7 +514,7 @@ export default class Tokenizer {
    private handleInAttributeValue(c: number, quote: number) {
      if (c === quote || (!this.decodeEntities && this.fastForwardTo(quote))) {
        this.cbs.onattribdata(this.sectionStart, this.index)
-      this.recordStart(-1)
+      this.sectionStart = -1
        this.cbs.onattribend(
          quote === CharCodes.DoubleQuote ? QuoteType.Double : QuoteType.Single,
          this.index + 1
@@ -516,7 +533,7 @@ export default class Tokenizer {
    private stateInAttributeValueNoQuotes(c: number): void {
      if (isWhitespace(c) || c === CharCodes.Gt) {
        this.cbs.onattribdata(this.sectionStart, this.index)
-      this.recordStart(-1)
+      this.sectionStart = -1
        this.cbs.onattribend(QuoteType.Unquoted, this.index)
        this.state = State.BeforeAttributeName
        this.stateBeforeAttributeName(c)
@@ -535,16 +552,16 @@ export default class Tokenizer {
    }
    private stateInDeclaration(c: number): void {
      if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
-      this.cbs.ondeclaration(this.sectionStart, this.index)
+      // this.cbs.ondeclaration(this.sectionStart, this.index)
        this.state = State.Text
-      this.recordStart(this.index + 1)
+      this.sectionStart = this.index + 1
      }
    }
    private stateInProcessingInstruction(c: number): void {
      if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
-      this.cbs.onprocessinginstruction(this.sectionStart, this.index)
+      // this.cbs.onprocessinginstruction(this.sectionStart, this.index)
        this.state = State.Text
-      this.recordStart(this.index + 1)
+      this.sectionStart = this.index + 1
      }
    }
    private stateBeforeComment(c: number): void {
@@ -553,7 +570,7 @@ export default class Tokenizer {
        this.currentSequence = Sequences.CommentEnd
        // Allow short comments (eg. <!-->)
        this.sequenceIndex = 2
-      this.recordStart(this.index + 1)
+      this.sectionStart = this.index + 1
      } else {
        this.state = State.InDeclaration
      }
@@ -562,7 +579,7 @@ export default class Tokenizer {
      if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
        this.cbs.oncomment(this.sectionStart, this.index, 0)
        this.state = State.Text
-      this.recordStart(this.index + 1)
+      this.sectionStart = this.index + 1
      }
    }
    private stateBeforeSpecialS(c: number): void {
@@ -715,14 +732,10 @@ export default class Tokenizer {
            break
          }
        }
-      this.index++
-      // line / column handling
        if (c === CharCodes.NewLine) {
-        this.line++
-        this.column = 1
-      } else {
-        this.column++
+        this.newlines.push(this.index)
        }
+      this.index++
      }
      this.cleanup()
      this.finish()
@@ -739,14 +752,14 @@ export default class Tokenizer {
          (this.state === State.InSpecialTag && this.sequenceIndex === 0)
        ) {
          this.cbs.ontext(this.sectionStart, this.index)
-        this.recordStart()
+        this.sectionStart = this.index
        } else if (
          this.state === State.InAttributeValueDq ||
          this.state === State.InAttributeValueSq ||
          this.state === State.InAttributeValueNq
        ) {
          this.cbs.onattribdata(this.sectionStart, this.index)
-        this.recordStart()
+        this.sectionStart = this.index
        }
      }
    }
@@ -805,7 +818,7 @@ export default class Tokenizer {
        if (this.sectionStart < this.entityStart) {
          this.cbs.onattribdata(this.sectionStart, this.entityStart)
        }
-      this.recordStart(this.entityStart + consumed)
+      this.sectionStart = this.entityStart + consumed
        this.index = this.sectionStart - 1
  
        this.cbs.onattribentity(cp)
@@ -813,7 +826,7 @@ export default class Tokenizer {
        if (this.sectionStart < this.entityStart) {
          this.cbs.ontext(this.sectionStart, this.entityStart)
        }
-      this.recordStart(this.entityStart + consumed)
+      this.sectionStart = this.entityStart + consumed
        this.index = this.sectionStart - 1
  
        this.cbs.ontextentity(cp, this.sectionStart)
diff --git a/packages/compiler-core/src/parser/index.ts b/packages/compiler-core/src/parser/index.ts

index 2c7cf540d07d4370b7e8ca41d738df1be2c6b2ab..88c0b5576d592a19a37f1c736bc0cb89bb4ff18f 100644 (file)
--- a/packages/compiler-core/src/parser/index.ts
+++ b/packages/compiler-core/src/parser/index.ts
@@ -228,20 +228,6 @@ const tokenizer = new Tokenizer(
        endIndex = end
        // TODO throw error
        startIndex = end + 1
-    },
-
-    // TODO ignore
-    ondeclaration(start, end) {
-      endIndex = end
-      // TODO onprocessinginstruction
-      startIndex = end + 1
-    },
-
-    // TODO ignore
-    onprocessinginstruction(start, end) {
-      endIndex = end
-      // TODO onprocessinginstruction
-      startIndex = end + 1
      }
    }
  )
@@ -306,12 +292,8 @@ function onText(content: string, start: number, end: number) {
        type: NodeTypes.TEXT,
        content,
        loc: {
-        start: {
-          offset: start,
-          line: tokenizer.startLine,
-          column: tokenizer.startColumn
-        },
-        end: { offset: end, line: tokenizer.line, column: tokenizer.column },
+        start: tokenizer.getPositionForIndex(start),
+        end: tokenizer.getPositionForIndex(end),
          source: content
        }
      })
author	Evan You <yyx990803@gmail.com>
	Tue, 14 Nov 2023 08:35:52 +0000 (16:35 +0800)
committer	Evan You <yyx990803@gmail.com>
	Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
packages/compiler-core/src/parser/Tokenizer.ts		patch \| blob \| blame \| history
packages/compiler-core/src/parser/index.ts		patch \| blob \| blame \| history