]> git.ipfire.org Git - thirdparty/vuejs/core.git/commitdiff
wip: parse directive in tokenizer
authorEvan You <yyx990803@gmail.com>
Tue, 14 Nov 2023 17:14:36 +0000 (01:14 +0800)
committerEvan You <yyx990803@gmail.com>
Sat, 25 Nov 2023 08:18:29 +0000 (16:18 +0800)
packages/compiler-core/src/parser/Parser.ts [deleted file]
packages/compiler-core/src/parser/Tokenizer.ts
packages/compiler-core/src/parser/index.ts

diff --git a/packages/compiler-core/src/parser/Parser.ts b/packages/compiler-core/src/parser/Parser.ts
deleted file mode 100644 (file)
index be6ff0b..0000000
+++ /dev/null
@@ -1,446 +0,0 @@
-import Tokenizer, { Callbacks, QuoteType } from './Tokenizer.js'
-import { fromCodePoint } from 'entities/lib/decode.js'
-
-const formTags = new Set([
-  'input',
-  'option',
-  'optgroup',
-  'select',
-  'button',
-  'datalist',
-  'textarea'
-])
-const pTag = new Set(['p'])
-const tableSectionTags = new Set(['thead', 'tbody'])
-const ddtTags = new Set(['dd', 'dt'])
-const rtpTags = new Set(['rt', 'rp'])
-
-const openImpliesClose = new Map<string, Set<string>>([
-  ['tr', new Set(['tr', 'th', 'td'])],
-  ['th', new Set(['th'])],
-  ['td', new Set(['thead', 'th', 'td'])],
-  ['body', new Set(['head', 'link', 'script'])],
-  ['li', new Set(['li'])],
-  ['p', pTag],
-  ['h1', pTag],
-  ['h2', pTag],
-  ['h3', pTag],
-  ['h4', pTag],
-  ['h5', pTag],
-  ['h6', pTag],
-  ['select', formTags],
-  ['input', formTags],
-  ['output', formTags],
-  ['button', formTags],
-  ['datalist', formTags],
-  ['textarea', formTags],
-  ['option', new Set(['option'])],
-  ['optgroup', new Set(['optgroup', 'option'])],
-  ['dd', ddtTags],
-  ['dt', ddtTags],
-  ['address', pTag],
-  ['article', pTag],
-  ['aside', pTag],
-  ['blockquote', pTag],
-  ['details', pTag],
-  ['div', pTag],
-  ['dl', pTag],
-  ['fieldset', pTag],
-  ['figcaption', pTag],
-  ['figure', pTag],
-  ['footer', pTag],
-  ['form', pTag],
-  ['header', pTag],
-  ['hr', pTag],
-  ['main', pTag],
-  ['nav', pTag],
-  ['ol', pTag],
-  ['pre', pTag],
-  ['section', pTag],
-  ['table', pTag],
-  ['ul', pTag],
-  ['rt', rtpTags],
-  ['rp', rtpTags],
-  ['tbody', tableSectionTags],
-  ['tfoot', tableSectionTags]
-])
-
-const voidElements = new Set([
-  'area',
-  'base',
-  'basefont',
-  'br',
-  'col',
-  'command',
-  'embed',
-  'frame',
-  'hr',
-  'img',
-  'input',
-  'isindex',
-  'keygen',
-  'link',
-  'meta',
-  'param',
-  'source',
-  'track',
-  'wbr'
-])
-
-const foreignContextElements = new Set(['math', 'svg'])
-
-const htmlIntegrationElements = new Set([
-  'mi',
-  'mo',
-  'mn',
-  'ms',
-  'mtext',
-  'annotation-xml',
-  'foreignobject',
-  'desc',
-  'title'
-])
-
-export interface ParserOptions {
-  /**
-   * Decode entities within the document.
-   *
-   * @default true
-   */
-  decodeEntities?: boolean
-}
-
-export interface Handler {
-  onparserinit(parser: Parser): void
-
-  /**
-   * Resets the handler back to starting state
-   */
-  onreset(): void
-
-  /**
-   * Signals the handler that parsing is done
-   */
-  onend(): void
-  onerror(error: Error): void
-  onclosetag(name: string, isImplied: boolean): void
-  onopentagname(name: string): void
-  /**
-   *
-   * @param name Name of the attribute
-   * @param value Value of the attribute.
-   * @param quote Quotes used around the attribute. `null` if the attribute has no quotes around the value, `undefined` if the attribute has no value.
-   */
-  onattribute(
-    name: string,
-    value: string,
-    quote?: string | undefined | null
-  ): void
-  onopentag(
-    name: string,
-    attribs: { [s: string]: string },
-    isImplied: boolean
-  ): void
-  ontext(data: string): void
-  oncomment(data: string): void
-  oncdatastart(): void
-  oncdataend(): void
-  oncommentend(): void
-  onprocessinginstruction(name: string, data: string): void
-}
-
-const reNameEnd = /\s|\//
-
-export class Parser implements Callbacks {
-  /** The start index of the last event. */
-  public startIndex = 0
-  /** The end index of the last event. */
-  public endIndex = 0
-  /**
-   * Store the start index of the current open tag,
-   * so we can update the start index for attributes.
-   */
-  private openTagStart = 0
-
-  private tagname = ''
-  private attribname = ''
-  private attribvalue = ''
-  private attribs: null | { [key: string]: string } = null
-  private readonly stack: string[] = []
-  /** Determines whether self-closing tags are recognized. */
-  private readonly foreignContext: boolean[]
-  private readonly cbs: Partial<Handler>
-  private readonly tokenizer: Tokenizer
-
-  private buffer: string = ''
-
-  constructor(
-    cbs?: Partial<Handler> | null,
-    private readonly options: ParserOptions = {}
-  ) {
-    this.cbs = cbs ?? {}
-    this.tokenizer = new Tokenizer(this.options, this)
-    this.foreignContext = [false]
-    this.cbs.onparserinit?.(this)
-  }
-
-  // Tokenizer event handlers
-
-  /** @internal */
-  ontext(start: number, endIndex: number): void {
-    const data = this.getSlice(start, endIndex)
-    this.endIndex = endIndex - 1
-    this.cbs.ontext?.(data)
-    this.startIndex = endIndex
-  }
-
-  /** @internal */
-  ontextentity(cp: number, endIndex: number): void {
-    this.endIndex = endIndex - 1
-    this.cbs.ontext?.(fromCodePoint(cp))
-    this.startIndex = endIndex
-  }
-
-  /** @internal */
-  onopentagname(start: number, endIndex: number): void {
-    this.emitOpenTag(this.getSlice(start, (this.endIndex = endIndex)))
-  }
-
-  private emitOpenTag(name: string) {
-    this.openTagStart = this.startIndex
-    this.tagname = name
-
-    const impliesClose = openImpliesClose.get(name)
-
-    if (impliesClose) {
-      while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
-        const element = this.stack.shift()!
-        this.cbs.onclosetag?.(element, true)
-      }
-    }
-    if (!voidElements.has(name)) {
-      this.stack.unshift(name)
-
-      if (foreignContextElements.has(name)) {
-        this.foreignContext.unshift(true)
-      } else if (htmlIntegrationElements.has(name)) {
-        this.foreignContext.unshift(false)
-      }
-    }
-    this.cbs.onopentagname?.(name)
-    if (this.cbs.onopentag) this.attribs = {}
-  }
-
-  private endOpenTag(isImplied: boolean) {
-    this.startIndex = this.openTagStart
-
-    if (this.attribs) {
-      this.cbs.onopentag?.(this.tagname, this.attribs, isImplied)
-      this.attribs = null
-    }
-    if (this.cbs.onclosetag && voidElements.has(this.tagname)) {
-      this.cbs.onclosetag(this.tagname, true)
-    }
-
-    this.tagname = ''
-  }
-
-  /** @internal */
-  onopentagend(endIndex: number): void {
-    this.endIndex = endIndex
-    this.endOpenTag(false)
-
-    // Set `startIndex` for next node
-    this.startIndex = endIndex + 1
-  }
-
-  /** @internal */
-  onclosetag(start: number, endIndex: number): void {
-    const name = this.getSlice(start, (this.endIndex = endIndex))
-
-    if (foreignContextElements.has(name) || htmlIntegrationElements.has(name)) {
-      this.foreignContext.shift()
-    }
-
-    if (!voidElements.has(name)) {
-      const pos = this.stack.indexOf(name)
-      if (pos !== -1) {
-        for (let index = 0; index <= pos; index++) {
-          const element = this.stack.shift()!
-          // We know the stack has sufficient elements.
-          this.cbs.onclosetag?.(element, index !== pos)
-        }
-      } else if (name === 'p') {
-        // Implicit open before close
-        this.emitOpenTag('p')
-        this.closeCurrentTag(true)
-      }
-    } else if (name === 'br') {
-      // We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
-      this.cbs.onopentagname?.('br')
-      this.cbs.onopentag?.('br', {}, true)
-      this.cbs.onclosetag?.('br', false)
-    }
-
-    // Set `startIndex` for next node
-    this.startIndex = endIndex + 1
-  }
-
-  /** @internal */
-  onselfclosingtag(endIndex: number): void {
-    this.endIndex = endIndex
-    this.closeCurrentTag(false)
-    // Set `startIndex` for next node
-    this.startIndex = endIndex + 1
-  }
-
-  private closeCurrentTag(isOpenImplied: boolean) {
-    const name = this.tagname
-    this.endOpenTag(isOpenImplied)
-
-    // Self-closing tags will be on the top of the stack
-    if (this.stack[0] === name) {
-      // If the opening tag isn't implied, the closing tag has to be implied.
-      this.cbs.onclosetag?.(name, !isOpenImplied)
-      this.stack.shift()
-    }
-  }
-
-  /** @internal */
-  onattribname(start: number, endIndex: number): void {
-    this.attribname = this.getSlice((this.startIndex = start), endIndex)
-  }
-
-  /** @internal */
-  onattribdata(start: number, endIndex: number): void {
-    this.attribvalue += this.getSlice(start, endIndex)
-  }
-
-  /** @internal */
-  onattribentity(cp: number): void {
-    this.attribvalue += fromCodePoint(cp)
-  }
-
-  /** @internal */
-  onattribend(quote: QuoteType, endIndex: number): void {
-    this.endIndex = endIndex
-
-    this.cbs.onattribute?.(
-      this.attribname,
-      this.attribvalue,
-      quote === QuoteType.Double
-        ? '"'
-        : quote === QuoteType.Single
-        ? "'"
-        : quote === QuoteType.NoValue
-        ? undefined
-        : null
-    )
-
-    if (
-      this.attribs &&
-      !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)
-    ) {
-      this.attribs[this.attribname] = this.attribvalue
-    }
-    this.attribvalue = ''
-  }
-
-  private getInstructionName(value: string) {
-    const index = value.search(reNameEnd)
-    return index < 0 ? value : value.slice(0, index)
-  }
-
-  /** @internal */
-  ondeclaration(start: number, endIndex: number): void {
-    this.endIndex = endIndex
-    const value = this.getSlice(start, endIndex)
-
-    if (this.cbs.onprocessinginstruction) {
-      const name = this.getInstructionName(value)
-      this.cbs.onprocessinginstruction(`!${name}`, `!${value}`)
-    }
-
-    // Set `startIndex` for next node
-    this.startIndex = endIndex + 1
-  }
-
-  /** @internal */
-  onprocessinginstruction(start: number, endIndex: number): void {
-    this.endIndex = endIndex
-    const value = this.getSlice(start, endIndex)
-
-    if (this.cbs.onprocessinginstruction) {
-      const name = this.getInstructionName(value)
-      this.cbs.onprocessinginstruction(`?${name}`, `?${value}`)
-    }
-
-    // Set `startIndex` for next node
-    this.startIndex = endIndex + 1
-  }
-
-  /** @internal */
-  oncomment(start: number, endIndex: number, offset: number): void {
-    this.endIndex = endIndex
-
-    this.cbs.oncomment?.(this.getSlice(start, endIndex - offset))
-    this.cbs.oncommentend?.()
-
-    // Set `startIndex` for next node
-    this.startIndex = endIndex + 1
-  }
-
-  /** @internal */
-  oncdata(start: number, endIndex: number, offset: number): void {
-    this.endIndex = endIndex
-    this.cbs.oncdatastart?.()
-    this.cbs.ontext?.(this.getSlice(start, endIndex - offset))
-    this.cbs.oncdataend?.()
-    // Set `startIndex` for next node
-    this.startIndex = endIndex + 1
-  }
-
-  /** @internal */
-  onend(): void {
-    if (this.cbs.onclosetag) {
-      // Set the end index for all remaining tags
-      this.endIndex = this.startIndex
-      for (let index = 0; index < this.stack.length; index++) {
-        this.cbs.onclosetag(this.stack[index], true)
-      }
-    }
-    this.cbs.onend?.()
-  }
-
-  private getSlice(start: number, end: number) {
-    return this.buffer.slice(start, end)
-  }
-
-  /**
-   * Parses a chunk of data and calls the corresponding callbacks.
-   *
-   * @param input string to parse.
-   */
-  public parse(input: string): void {
-    this.reset()
-    this.buffer = input
-    this.tokenizer.parse(input)
-  }
-
-  /**
-   * Resets the parser to a blank state, ready to parse a new HTML document
-   */
-  public reset(): void {
-    this.cbs.onreset?.()
-    this.tokenizer.reset()
-    this.tagname = ''
-    this.attribname = ''
-    this.attribs = null
-    this.stack.length = 0
-    this.startIndex = 0
-    this.endIndex = 0
-    this.cbs.onparserinit?.(this)
-    this.foreignContext.length = 0
-    this.foreignContext.unshift(false)
-  }
-}
index 05bf9eea350dfa3b5c49968a7daed0764579dac0..f8dd287bb91fb2516fef8dc618fb45890299893a 100644 (file)
@@ -56,7 +56,13 @@ export const enum CharCodes {
   UpperZ = 0x5a, // "Z"
   LowerZ = 0x7a, // "z"
   LowerX = 0x78, // "x"
-  OpeningSquareBracket = 0x5b // "["
+  OpeningSquareBracket = 0x5b, // "["
+  LowerV = 0x76, // "v"
+  Dot = 0x2e, // "."
+  Colon = 0x3a, // ":"
+  At = 0x40, // "@"
+  LeftSqaure = 91, // "["
+  RightSquare = 93 // "]"
 }
 
 /** All the states the tokenizer can be in. */
@@ -72,6 +78,10 @@ const enum State {
   // Attributes
   BeforeAttributeName,
   InAttributeName,
+  InDirectiveName,
+  InDirectiveArg,
+  InDirectiveDynamicArg,
+  InDirectiveModifier,
   AfterAttributeName,
   BeforeAttributeValue,
   InAttributeValueDq, // "
@@ -134,6 +144,10 @@ export interface Callbacks {
   onattribend(quote: QuoteType, endIndex: number): void
   onattribname(start: number, endIndex: number): void
 
+  ondirname(start: number, endIndex: number): void
+  ondirarg(start: number, endIndex: number): void
+  ondirmodifier(start: number, endIndex: number): void
+
   oncomment(start: number, endIndex: number, endOffset: number): void
   oncdata(start: number, endIndex: number, endOffset: number): void
 
@@ -461,6 +475,26 @@ export default class Tokenizer {
     } else if (c === CharCodes.Slash) {
       this.state = State.InSelfClosingTag
     } else if (!isWhitespace(c)) {
+      this.enterAttribute(c)
+    }
+  }
+  private enterAttribute(c: number) {
+    if (
+      c === CharCodes.LowerV &&
+      this.buffer.charCodeAt(this.index + 1) === CharCodes.Dash
+    ) {
+      this.state = State.InDirectiveName
+      this.sectionStart = this.index
+    } else if (
+      c === CharCodes.Dot ||
+      c === CharCodes.Colon ||
+      c === CharCodes.At ||
+      c === CharCodes.Number
+    ) {
+      this.cbs.ondirname(this.index, this.index + 1)
+      this.state = State.InDirectiveArg
+      this.sectionStart = this.index + 1
+    } else {
       this.state = State.InAttributeName
       this.sectionStart = this.index
     }
@@ -484,6 +518,54 @@ export default class Tokenizer {
       this.stateAfterAttributeName(c)
     }
   }
+  private stateInDirectiveName(c: number): void {
+    if (c === CharCodes.Eq || isEndOfTagSection(c)) {
+      this.cbs.ondirname(this.sectionStart, this.index)
+      this.sectionStart = this.index
+      this.state = State.AfterAttributeName
+      this.stateAfterAttributeName(c)
+    } else if (c === CharCodes.Colon) {
+      this.cbs.ondirname(this.sectionStart, this.index)
+      this.state = State.InDirectiveArg
+      this.sectionStart = this.index + 1
+    } else if (c === CharCodes.Dot) {
+      this.cbs.ondirname(this.sectionStart, this.index)
+      this.state = State.InDirectiveModifier
+      this.sectionStart = this.index + 1
+    }
+  }
+  private stateInDirectiveArg(c: number): void {
+    if (c === CharCodes.Eq || isEndOfTagSection(c)) {
+      this.cbs.ondirarg(this.sectionStart, this.index)
+      this.sectionStart = this.index
+      this.state = State.AfterAttributeName
+      this.stateAfterAttributeName(c)
+    } else if (c === CharCodes.LeftSqaure) {
+      this.state = State.InDirectiveDynamicArg
+    } else if (c === CharCodes.Dot) {
+      this.cbs.ondirarg(this.sectionStart, this.index)
+      this.state = State.InDirectiveModifier
+      this.sectionStart = this.index + 1
+    }
+  }
+  private stateInDynamicDirectiveArg(c: number): void {
+    if (c === CharCodes.RightSquare) {
+      this.state = State.InDirectiveArg
+    } else if (c === CharCodes.Eq || isEndOfTagSection(c)) {
+      // TODO emit error
+    }
+  }
+  private stateInDirectiveModifier(c: number): void {
+    if (c === CharCodes.Eq || isEndOfTagSection(c)) {
+      this.cbs.ondirmodifier(this.sectionStart, this.index)
+      this.sectionStart = this.index
+      this.state = State.AfterAttributeName
+      this.stateAfterAttributeName(c)
+    } else if (c === CharCodes.Dot) {
+      this.cbs.ondirmodifier(this.sectionStart, this.index)
+      this.sectionStart = this.index + 1
+    }
+  }
   private stateAfterAttributeName(c: number): void {
     if (c === CharCodes.Eq) {
       this.state = State.BeforeAttributeValue
@@ -494,8 +576,7 @@ export default class Tokenizer {
       this.stateBeforeAttributeName(c)
     } else if (!isWhitespace(c)) {
       this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
-      this.state = State.InAttributeName
-      this.sectionStart = this.index
+      this.enterAttribute(c)
     }
   }
   private stateBeforeAttributeValue(c: number): void {
@@ -655,6 +736,22 @@ export default class Tokenizer {
           this.stateInAttributeName(c)
           break
         }
+        case State.InDirectiveName: {
+          this.stateInDirectiveName(c)
+          break
+        }
+        case State.InDirectiveArg: {
+          this.stateInDirectiveArg(c)
+          break
+        }
+        case State.InDirectiveDynamicArg: {
+          this.stateInDynamicDirectiveArg(c)
+          break
+        }
+        case State.InDirectiveModifier: {
+          this.stateInDirectiveModifier(c)
+          break
+        }
         case State.InCommentLike: {
           this.stateInCommentLike(c)
           break
@@ -796,6 +893,10 @@ export default class Tokenizer {
       this.state === State.BeforeAttributeValue ||
       this.state === State.AfterAttributeName ||
       this.state === State.InAttributeName ||
+      this.state === State.InDirectiveName ||
+      this.state === State.InDirectiveArg ||
+      this.state === State.InDirectiveDynamicArg ||
+      this.state === State.InDirectiveModifier ||
       this.state === State.InAttributeValueSq ||
       this.state === State.InAttributeValueDq ||
       this.state === State.InAttributeValueNq ||
index 2e6606086326edd7ac810b5be7d49a8e12023258..450ae2263a3ff41e7483bd0f78dd8293676a22ca 100644 (file)
@@ -142,11 +142,6 @@ const tokenizer = new Tokenizer(
 
     onattribname(start, end) {
       const name = getSlice(start, end)
-      if (currentAttrs.has(name)) {
-        // TODO emit error DUPLICATE_ATTRIBUTE
-      } else {
-        currentAttrs.add(name)
-      }
       if (!inVPre && isDirective(name)) {
         // directive
         const match = directiveParseRE.exec(name)!
@@ -259,42 +254,59 @@ const tokenizer = new Tokenizer(
       currentAttrValue += fromCodePoint(codepoint)
     },
     onattribend(_quote, end) {
-      if (currentElement) {
-        if (currentAttrValue) {
-          if (currentProp!.type === NodeTypes.ATTRIBUTE) {
-            // assign value
-            currentProp!.value = {
-              type: NodeTypes.TEXT,
-              content: currentAttrValue,
-              // @ts-expect-error TODO
-              loc: {}
-            }
-          } else {
-            // directive
-            currentProp!.exp = {
-              type: NodeTypes.SIMPLE_EXPRESSION,
-              content: currentAttrValue,
-              isStatic: false,
-              // Treat as non-constant by default. This can be potentially set to
-              // other values by `transformExpression` to make it eligible for hoisting.
-              constType: ConstantTypes.NOT_CONSTANT,
-              // @ts-expect-error TODO
-              loc: {}
-            }
-          }
-        }
-        currentProp!.loc.end = tokenizer.getPositionForIndex(end)
-        currentElement.props.push(currentProp!)
-      }
+      // TODO check duplicate
+      // if (currentAttrs.has(name)) {
+      //   // emit error DUPLICATE_ATTRIBUTE
+      // } else {
+      //   currentAttrs.add(name)
+      // }
+      // if (currentElement) {
+      //   if (currentAttrValue) {
+      //     if (currentProp!.type === NodeTypes.ATTRIBUTE) {
+      //       // assign value
+      //       currentProp!.value = {
+      //         type: NodeTypes.TEXT,
+      //         content: currentAttrValue,
+      //         // @ts-expect-error TODO
+      //         loc: {}
+      //       }
+      //     } else {
+      //       // directive
+      //       currentProp!.exp = {
+      //         type: NodeTypes.SIMPLE_EXPRESSION,
+      //         content: currentAttrValue,
+      //         isStatic: false,
+      //         // Treat as non-constant by default. This can be potentially set to
+      //         // other values by `transformExpression` to make it eligible for hoisting.
+      //         constType: ConstantTypes.NOT_CONSTANT,
+      //         // @ts-expect-error TODO
+      //         loc: {}
+      //       }
+      //     }
+      //   }
+      //   currentProp!.loc.end = tokenizer.getPositionForIndex(end)
+      //   currentElement.props.push(currentProp!)
+      // }
       currentAttrValue = ''
     },
 
+    ondirname(start, end) {
+      // console.log('name ' + getSlice(start, end))
+      currentProp
+    },
+    ondirarg(start, end) {
+      // console.log('arg ' + getSlice(start, end))
+    },
+    ondirmodifier(start, end) {
+      // console.log('.' + getSlice(start, end))
+    },
+
     oncomment(start, end, offset) {
       // TODO oncomment
     },
 
     onend() {
-      const end = currentInput.length
+      const end = currentInput.length - 1
       for (let index = 0; index < stack.length; index++) {
         onCloseTag(stack[index], end)
       }