From: Evan You Date: Fri, 17 Nov 2023 01:22:12 +0000 (+0800) Subject: wip: parse mode X-Git-Tag: v3.4.0-alpha.2~45 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=65b44045ef07409a587cc1dc155f7d1d8deabd42;p=thirdparty%2Fvuejs%2Fcore.git wip: parse mode --- diff --git a/packages/compiler-core/src/index.ts b/packages/compiler-core/src/index.ts index 5ce3134fc1..c88844a5c0 100644 --- a/packages/compiler-core/src/index.ts +++ b/packages/compiler-core/src/index.ts @@ -71,4 +71,4 @@ export { CompilerDeprecationTypes } from './compat/compatConfig' -// export { baseParse as newParse } from './parser/index' +export { baseParse as newParse } from './parser/index' diff --git a/packages/compiler-core/src/options.ts b/packages/compiler-core/src/options.ts index abfba98e35..3ab42625ea 100644 --- a/packages/compiler-core/src/options.ts +++ b/packages/compiler-core/src/options.ts @@ -17,6 +17,7 @@ export interface ErrorHandlingOptions { export interface ParserOptions extends ErrorHandlingOptions, CompilerCompatOptions { + parseMode?: 'base' | 'html' | 'sfc' /** * e.g. platform native elements, e.g. `
` for browsers */ diff --git a/packages/compiler-core/src/parse.ts b/packages/compiler-core/src/parse.ts index c237239db9..d9cdb4e59a 100644 --- a/packages/compiler-core/src/parse.ts +++ b/packages/compiler-core/src/parse.ts @@ -40,6 +40,7 @@ import { } from './compat/compatConfig' type OptionalOptions = + | 'parseMode' | 'whitespace' | 'isNativeTag' | 'isBuiltInComponent' diff --git a/packages/compiler-core/src/parser/Tokenizer.ts b/packages/compiler-core/src/parser/Tokenizer.ts index 20cb14340f..96b63ead33 100644 --- a/packages/compiler-core/src/parser/Tokenizer.ts +++ b/packages/compiler-core/src/parser/Tokenizer.ts @@ -27,7 +27,7 @@ import { DecodingMode, htmlDecodeTree } from 'entities/lib/decode.js' -import { Position } from '../ast' +import { ElementNode, Position } from '../ast' export const enum ParseMode { BASE, @@ -119,7 +119,9 @@ const enum State { SpecialStartSequence, InSpecialTag, - InEntity + InEntity, + + InSFCRootTagName } /** @@ -147,6 +149,14 @@ function isEndOfTagSection(c: number): boolean { return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c) } +export function toCharCodes(str: string): Uint8Array { + const ret = new Uint8Array(str.length) + for (let i = 0; i < str.length; i++) { + ret[i] = str.charCodeAt(i) + } + return ret +} + export enum QuoteType { NoValue = 0, Unquoted = 1, @@ -221,14 +231,20 @@ export default class Tokenizer { private readonly entityDecoder: EntityDecoder - constructor(private readonly cbs: Callbacks) { + constructor( + private readonly stack: ElementNode[], + private readonly cbs: Callbacks + ) { this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) => this.emitCodePoint(cp, consumed) ) } + public mode = ParseMode.BASE + public reset(): void { this.state = State.Text + this.mode = ParseMode.BASE this.buffer = '' this.sectionStart = 0 this.index = 0 @@ -329,7 +345,7 @@ export default class Tokenizer { this.stateInTagName(c) } - /** Look for an end tag. For tags, also decode entities. */ + /** Look for an end tag. For <title> and <textarea>, also decode entities. */ private stateInSpecialTag(c: number): void { if (this.sequenceIndex === this.currentSequence.length) { if (c === CharCodes.Gt || isWhitespace(c)) { @@ -357,7 +373,8 @@ export default class Tokenizer { } else if (this.sequenceIndex === 0) { if ( this.currentSequence === Sequences.TitleEnd || - this.currentSequence === Sequences.TextareaEnd + (this.currentSequence === Sequences.TextareaEnd && + !(this.mode === ParseMode.SFC && this.stack.length === 0)) ) { // We have to parse entities in <title> and <textarea> tags. if (c === CharCodes.Amp) { @@ -459,15 +476,26 @@ export default class Tokenizer { this.state = State.InProcessingInstruction this.sectionStart = this.index + 1 } else if (isTagStartChar(c)) { - const lower = c | 0x20 this.sectionStart = this.index - if (lower === Sequences.TitleEnd[2]) { - this.state = State.BeforeSpecialT + if (this.mode === ParseMode.BASE) { + // no special tags in base mode + this.state = State.InTagName + } else if (this.mode === ParseMode.SFC && this.stack.length === 0) { + // SFC mode + root level + // - everything except <template> is RAWTEXT + // - <template> with lang other than html is also RAWTEXT + this.state = State.InSFCRootTagName } else { - this.state = - lower === Sequences.ScriptEnd[2] - ? State.BeforeSpecialS - : State.InTagName + // HTML mode + // - <script>, <style> RAWTEXT + // - <title>, <textarea> RCDATA + const lower = c | 0x20 + if (lower === 116 /* t */) { + this.state = State.BeforeSpecialT + } else { + this.state = + lower === 115 /* s */ ? State.BeforeSpecialS : State.InTagName + } } } else if (c === CharCodes.Slash) { this.state = State.BeforeClosingTagName @@ -478,12 +506,25 @@ export default class Tokenizer { } private stateInTagName(c: number): void { if (isEndOfTagSection(c)) { - this.cbs.onopentagname(this.sectionStart, this.index) - this.sectionStart = -1 - this.state = State.BeforeAttributeName - this.stateBeforeAttributeName(c) + this.handleTagName(c) + } + } + private stateInSFCRootTagName(c: number): void { + if (isEndOfTagSection(c)) { + const tag = this.buffer.slice(this.sectionStart, this.index) + if (tag !== 'template') { + this.isSpecial = true + this.currentSequence = toCharCodes(`</` + tag) + } + this.handleTagName(c) } } + private handleTagName(c: number) { + this.cbs.onopentagname(this.sectionStart, this.index) + this.sectionStart = -1 + this.state = State.BeforeAttributeName + this.stateBeforeAttributeName(c) + } private stateBeforeClosingTagName(c: number): void { if (isWhitespace(c)) { // Ignore @@ -830,6 +871,10 @@ export default class Tokenizer { this.stateInTagName(c) break } + case State.InSFCRootTagName: { + this.stateInSFCRootTagName(c) + break + } case State.InClosingTagName: { this.stateInClosingTagName(c) break diff --git a/packages/compiler-core/src/parser/index.ts b/packages/compiler-core/src/parser/index.ts index f6df84a265..7447f53504 100644 --- a/packages/compiler-core/src/parser/index.ts +++ b/packages/compiler-core/src/parser/index.ts @@ -14,17 +14,23 @@ import { createRoot } from '../ast' import { ParserOptions } from '../options' -import Tokenizer, { CharCodes, QuoteType, isWhitespace } from './Tokenizer' +import Tokenizer, { + CharCodes, + ParseMode, + QuoteType, + isWhitespace, + toCharCodes +} from './Tokenizer' import { CompilerCompatOptions } from '../compat/compatConfig' import { NO, extend } from '@vue/shared' import { defaultOnError, defaultOnWarn } from '../errors' import { isCoreComponent } from '../utils' -import { TextModes } from '../parse' type OptionalOptions = | 'whitespace' | 'isNativeTag' | 'isBuiltInComponent' + | 'getTextMode' | keyof CompilerCompatOptions type MergedParserOptions = Omit<Required<ParserOptions>, OptionalOptions> & @@ -43,9 +49,9 @@ const decodeMap: Record<string, string> = { } export const defaultParserOptions: MergedParserOptions = { + parseMode: 'base', delimiters: [`{{`, `}}`], getNamespace: () => Namespaces.HTML, - getTextMode: () => TextModes.DATA, isVoidTag: NO, isPreTag: NO, isCustomElement: NO, @@ -73,7 +79,7 @@ let inVPre = false let currentElementIsVPreBoundary = false const stack: ElementNode[] = [] -const tokenizer = new Tokenizer({ +const tokenizer = new Tokenizer(stack, { ontext(start, end) { onText(getSlice(start, end), start, end) }, @@ -598,23 +604,24 @@ function reset() { stack.length = 0 } -function toCharCodes(str: string): Uint8Array { - const ret = new Uint8Array() - for (let i = 0; i < str.length; i++) { - ret[i] = str.charCodeAt(i) - } - return ret -} - export function baseParse(input: string, options?: ParserOptions): RootNode { reset() + currentInput = input + currentOptions = extend({}, defaultParserOptions, options) + + tokenizer.mode = + currentOptions.parseMode === 'html' + ? ParseMode.HTML + : currentOptions.parseMode === 'sfc' + ? ParseMode.SFC + : ParseMode.BASE + const delimiters = options?.delimiters if (delimiters) { tokenizer.delimiterOpen = toCharCodes(delimiters[0]) tokenizer.delimiterClose = toCharCodes(delimiters[1]) } - currentInput = input - currentOptions = extend({}, defaultParserOptions, options) + const root = (currentRoot = createRoot([])) tokenizer.parse(currentInput) root.loc.end = tokenizer.getPos(input.length) diff --git a/packages/vue/src/index.ts b/packages/vue/src/index.ts index 8215be7476..c00103bfee 100644 --- a/packages/vue/src/index.ts +++ b/packages/vue/src/index.ts @@ -91,3 +91,5 @@ registerRuntimeCompiler(compileToFunction) export { compileToFunction as compile } export * from '@vue/runtime-dom' + +export { newParse } from '@vue/compiler-dom'