From: Evan You Date: Sun, 12 Nov 2023 08:58:24 +0000 (+0800) Subject: wip: setup X-Git-Tag: v3.4.0-alpha.2~70 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5b9141cc308c992ee40bf823732776adc4208940;p=thirdparty%2Fvuejs%2Fcore.git wip: setup --- diff --git a/packages/compiler-core/package.json b/packages/compiler-core/package.json index 95c5f00adc..b5d7bfe0d7 100644 --- a/packages/compiler-core/package.json +++ b/packages/compiler-core/package.json @@ -34,6 +34,7 @@ "dependencies": { "@babel/parser": "^7.23.3", "@vue/shared": "workspace:*", + "entities": "^4.5.0", "estree-walker": "^2.0.2", "source-map-js": "^1.0.2" }, diff --git a/packages/compiler-core/src/parser/Parser.ts b/packages/compiler-core/src/parser/Parser.ts new file mode 100644 index 0000000000..fba7fde6c8 --- /dev/null +++ b/packages/compiler-core/src/parser/Parser.ts @@ -0,0 +1,653 @@ +import Tokenizer, { Callbacks, QuoteType } from './Tokenizer.js' +import { fromCodePoint } from 'entities/lib/decode.js' + +const formTags = new Set([ + 'input', + 'option', + 'optgroup', + 'select', + 'button', + 'datalist', + 'textarea' +]) +const pTag = new Set(['p']) +const tableSectionTags = new Set(['thead', 'tbody']) +const ddtTags = new Set(['dd', 'dt']) +const rtpTags = new Set(['rt', 'rp']) + +const openImpliesClose = new Map>([ + ['tr', new Set(['tr', 'th', 'td'])], + ['th', new Set(['th'])], + ['td', new Set(['thead', 'th', 'td'])], + ['body', new Set(['head', 'link', 'script'])], + ['li', new Set(['li'])], + ['p', pTag], + ['h1', pTag], + ['h2', pTag], + ['h3', pTag], + ['h4', pTag], + ['h5', pTag], + ['h6', pTag], + ['select', formTags], + ['input', formTags], + ['output', formTags], + ['button', formTags], + ['datalist', formTags], + ['textarea', formTags], + ['option', new Set(['option'])], + ['optgroup', new Set(['optgroup', 'option'])], + ['dd', ddtTags], + ['dt', ddtTags], + ['address', pTag], + ['article', pTag], + ['aside', pTag], + ['blockquote', pTag], + ['details', pTag], + ['div', pTag], + ['dl', pTag], + ['fieldset', pTag], + ['figcaption', pTag], + ['figure', pTag], + ['footer', pTag], + ['form', pTag], + ['header', pTag], + ['hr', pTag], + ['main', pTag], + ['nav', pTag], + ['ol', pTag], + ['pre', pTag], + ['section', pTag], + ['table', pTag], + ['ul', pTag], + ['rt', rtpTags], + ['rp', rtpTags], + ['tbody', tableSectionTags], + ['tfoot', tableSectionTags] +]) + +const voidElements = new Set([ + 'area', + 'base', + 'basefont', + 'br', + 'col', + 'command', + 'embed', + 'frame', + 'hr', + 'img', + 'input', + 'isindex', + 'keygen', + 'link', + 'meta', + 'param', + 'source', + 'track', + 'wbr' +]) + +const foreignContextElements = new Set(['math', 'svg']) + +const htmlIntegrationElements = new Set([ + 'mi', + 'mo', + 'mn', + 'ms', + 'mtext', + 'annotation-xml', + 'foreignobject', + 'desc', + 'title' +]) + +export interface ParserOptions { + /** + * Indicates whether special tags (``. + this.sequenceIndex = Number(c === CharCodes.Lt) + } + } + + private stateCDATASequence(c: number): void { + if (c === Sequences.Cdata[this.sequenceIndex]) { + if (++this.sequenceIndex === Sequences.Cdata.length) { + this.state = State.InCommentLike + this.currentSequence = Sequences.CdataEnd + this.sequenceIndex = 0 + this.sectionStart = this.index + 1 + } + } else { + this.sequenceIndex = 0 + this.state = State.InDeclaration + this.stateInDeclaration(c) // Reconsume the character + } + } + + /** + * When we wait for one specific character, we can speed things up + * by skipping through the buffer until we find it. + * + * @returns Whether the character was found. + */ + private fastForwardTo(c: number): boolean { + while (++this.index < this.buffer.length + this.offset) { + if (this.buffer.charCodeAt(this.index - this.offset) === c) { + return true + } + } + + /* + * We increment the index at the end of the `parse` loop, + * so set it to `buffer.length - 1` here. + * + * TODO: Refactor `parse` to increment index before calling states. + */ + this.index = this.buffer.length + this.offset - 1 + + return false + } + + /** + * Comments and CDATA end with `-->` and `]]>`. + * + * Their common qualities are: + * - Their end sequences have a distinct character they start with. + * - That character is then repeated, so we have to check multiple repeats. + * - All characters but the start character of the sequence can be skipped. + */ + private stateInCommentLike(c: number): void { + if (c === this.currentSequence[this.sequenceIndex]) { + if (++this.sequenceIndex === this.currentSequence.length) { + if (this.currentSequence === Sequences.CdataEnd) { + this.cbs.oncdata(this.sectionStart, this.index, 2) + } else { + this.cbs.oncomment(this.sectionStart, this.index, 2) + } + + this.sequenceIndex = 0 + this.sectionStart = this.index + 1 + this.state = State.Text + } + } else if (this.sequenceIndex === 0) { + // Fast-forward to the first character of the sequence + if (this.fastForwardTo(this.currentSequence[0])) { + this.sequenceIndex = 1 + } + } else if (c !== this.currentSequence[this.sequenceIndex - 1]) { + // Allow long sequences, eg. --->, ]]]> + this.sequenceIndex = 0 + } + } + + /** + * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. + * + * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). + * We allow anything that wouldn't end the tag. + */ + private isTagStartChar(c: number) { + return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c) + } + + private startSpecial(sequence: Uint8Array, offset: number) { + this.isSpecial = true + this.currentSequence = sequence + this.sequenceIndex = offset + this.state = State.SpecialStartSequence + } + + private stateBeforeTagName(c: number): void { + if (c === CharCodes.ExclamationMark) { + this.state = State.BeforeDeclaration + this.sectionStart = this.index + 1 + } else if (c === CharCodes.Questionmark) { + this.state = State.InProcessingInstruction + this.sectionStart = this.index + 1 + } else if (this.isTagStartChar(c)) { + const lower = c | 0x20 + this.sectionStart = this.index + if (!this.xmlMode && lower === Sequences.TitleEnd[2]) { + this.startSpecial(Sequences.TitleEnd, 3) + } else { + this.state = + !this.xmlMode && lower === Sequences.ScriptEnd[2] + ? State.BeforeSpecialS + : State.InTagName + } + } else if (c === CharCodes.Slash) { + this.state = State.BeforeClosingTagName + } else { + this.state = State.Text + this.stateText(c) + } + } + private stateInTagName(c: number): void { + if (isEndOfTagSection(c)) { + this.cbs.onopentagname(this.sectionStart, this.index) + this.sectionStart = -1 + this.state = State.BeforeAttributeName + this.stateBeforeAttributeName(c) + } + } + private stateBeforeClosingTagName(c: number): void { + if (isWhitespace(c)) { + // Ignore + } else if (c === CharCodes.Gt) { + this.state = State.Text + } else { + this.state = this.isTagStartChar(c) + ? State.InClosingTagName + : State.InSpecialComment + this.sectionStart = this.index + } + } + private stateInClosingTagName(c: number): void { + if (c === CharCodes.Gt || isWhitespace(c)) { + this.cbs.onclosetag(this.sectionStart, this.index) + this.sectionStart = -1 + this.state = State.AfterClosingTagName + this.stateAfterClosingTagName(c) + } + } + private stateAfterClosingTagName(c: number): void { + // Skip everything until ">" + if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { + this.state = State.Text + this.sectionStart = this.index + 1 + } + } + private stateBeforeAttributeName(c: number): void { + if (c === CharCodes.Gt) { + this.cbs.onopentagend(this.index) + if (this.isSpecial) { + this.state = State.InSpecialTag + this.sequenceIndex = 0 + } else { + this.state = State.Text + } + this.sectionStart = this.index + 1 + } else if (c === CharCodes.Slash) { + this.state = State.InSelfClosingTag + } else if (!isWhitespace(c)) { + this.state = State.InAttributeName + this.sectionStart = this.index + } + } + private stateInSelfClosingTag(c: number): void { + if (c === CharCodes.Gt) { + this.cbs.onselfclosingtag(this.index) + this.state = State.Text + this.sectionStart = this.index + 1 + this.isSpecial = false // Reset special state, in case of self-closing special tags + } else if (!isWhitespace(c)) { + this.state = State.BeforeAttributeName + this.stateBeforeAttributeName(c) + } + } + private stateInAttributeName(c: number): void { + if (c === CharCodes.Eq || isEndOfTagSection(c)) { + this.cbs.onattribname(this.sectionStart, this.index) + this.sectionStart = this.index + this.state = State.AfterAttributeName + this.stateAfterAttributeName(c) + } + } + private stateAfterAttributeName(c: number): void { + if (c === CharCodes.Eq) { + this.state = State.BeforeAttributeValue + } else if (c === CharCodes.Slash || c === CharCodes.Gt) { + this.cbs.onattribend(QuoteType.NoValue, this.sectionStart) + this.sectionStart = -1 + this.state = State.BeforeAttributeName + this.stateBeforeAttributeName(c) + } else if (!isWhitespace(c)) { + this.cbs.onattribend(QuoteType.NoValue, this.sectionStart) + this.state = State.InAttributeName + this.sectionStart = this.index + } + } + private stateBeforeAttributeValue(c: number): void { + if (c === CharCodes.DoubleQuote) { + this.state = State.InAttributeValueDq + this.sectionStart = this.index + 1 + } else if (c === CharCodes.SingleQuote) { + this.state = State.InAttributeValueSq + this.sectionStart = this.index + 1 + } else if (!isWhitespace(c)) { + this.sectionStart = this.index + this.state = State.InAttributeValueNq + this.stateInAttributeValueNoQuotes(c) // Reconsume token + } + } + private handleInAttributeValue(c: number, quote: number) { + if (c === quote || (!this.decodeEntities && this.fastForwardTo(quote))) { + this.cbs.onattribdata(this.sectionStart, this.index) + this.sectionStart = -1 + this.cbs.onattribend( + quote === CharCodes.DoubleQuote ? QuoteType.Double : QuoteType.Single, + this.index + 1 + ) + this.state = State.BeforeAttributeName + } else if (this.decodeEntities && c === CharCodes.Amp) { + this.startEntity() + } + } + private stateInAttributeValueDoubleQuotes(c: number): void { + this.handleInAttributeValue(c, CharCodes.DoubleQuote) + } + private stateInAttributeValueSingleQuotes(c: number): void { + this.handleInAttributeValue(c, CharCodes.SingleQuote) + } + private stateInAttributeValueNoQuotes(c: number): void { + if (isWhitespace(c) || c === CharCodes.Gt) { + this.cbs.onattribdata(this.sectionStart, this.index) + this.sectionStart = -1 + this.cbs.onattribend(QuoteType.Unquoted, this.index) + this.state = State.BeforeAttributeName + this.stateBeforeAttributeName(c) + } else if (this.decodeEntities && c === CharCodes.Amp) { + this.startEntity() + } + } + private stateBeforeDeclaration(c: number): void { + if (c === CharCodes.OpeningSquareBracket) { + this.state = State.CDATASequence + this.sequenceIndex = 0 + } else { + this.state = + c === CharCodes.Dash ? State.BeforeComment : State.InDeclaration + } + } + private stateInDeclaration(c: number): void { + if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { + this.cbs.ondeclaration(this.sectionStart, this.index) + this.state = State.Text + this.sectionStart = this.index + 1 + } + } + private stateInProcessingInstruction(c: number): void { + if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { + this.cbs.onprocessinginstruction(this.sectionStart, this.index) + this.state = State.Text + this.sectionStart = this.index + 1 + } + } + private stateBeforeComment(c: number): void { + if (c === CharCodes.Dash) { + this.state = State.InCommentLike + this.currentSequence = Sequences.CommentEnd + // Allow short comments (eg. ) + this.sequenceIndex = 2 + this.sectionStart = this.index + 1 + } else { + this.state = State.InDeclaration + } + } + private stateInSpecialComment(c: number): void { + if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { + this.cbs.oncomment(this.sectionStart, this.index, 0) + this.state = State.Text + this.sectionStart = this.index + 1 + } + } + private stateBeforeSpecialS(c: number): void { + const lower = c | 0x20 + if (lower === Sequences.ScriptEnd[3]) { + this.startSpecial(Sequences.ScriptEnd, 4) + } else if (lower === Sequences.StyleEnd[3]) { + this.startSpecial(Sequences.StyleEnd, 4) + } else { + this.state = State.InTagName + this.stateInTagName(c) // Consume the token again + } + } + + private startEntity() { + this.baseState = this.state + this.state = State.InEntity + this.entityStart = this.index + this.entityDecoder.startEntity( + this.xmlMode + ? DecodingMode.Strict + : this.baseState === State.Text || this.baseState === State.InSpecialTag + ? DecodingMode.Legacy + : DecodingMode.Attribute + ) + } + + private stateInEntity(): void { + const length = this.entityDecoder.write( + this.buffer, + this.index - this.offset + ) + + // If `length` is positive, we are done with the entity. + if (length >= 0) { + this.state = this.baseState + + if (length === 0) { + this.index = this.entityStart + } + } else { + // Mark buffer as consumed. + this.index = this.offset + this.buffer.length - 1 + } + } + + /** + * Remove data that has already been consumed from the buffer. + */ + private cleanup() { + // If we are inside of text or attributes, emit what we already have. + if (this.running && this.sectionStart !== this.index) { + if ( + this.state === State.Text || + (this.state === State.InSpecialTag && this.sequenceIndex === 0) + ) { + this.cbs.ontext(this.sectionStart, this.index) + this.sectionStart = this.index + } else if ( + this.state === State.InAttributeValueDq || + this.state === State.InAttributeValueSq || + this.state === State.InAttributeValueNq + ) { + this.cbs.onattribdata(this.sectionStart, this.index) + this.sectionStart = this.index + } + } + } + + private shouldContinue() { + return this.index < this.buffer.length + this.offset && this.running + } + + /** + * Iterates through the buffer, calling the function corresponding to the current state. + * + * States that are more likely to be hit are higher up, as a performance improvement. + */ + private parse() { + while (this.shouldContinue()) { + const c = this.buffer.charCodeAt(this.index - this.offset) + switch (this.state) { + case State.Text: { + this.stateText(c) + break + } + case State.SpecialStartSequence: { + this.stateSpecialStartSequence(c) + break + } + case State.InSpecialTag: { + this.stateInSpecialTag(c) + break + } + case State.CDATASequence: { + this.stateCDATASequence(c) + break + } + case State.InAttributeValueDq: { + this.stateInAttributeValueDoubleQuotes(c) + break + } + case State.InAttributeName: { + this.stateInAttributeName(c) + break + } + case State.InCommentLike: { + this.stateInCommentLike(c) + break + } + case State.InSpecialComment: { + this.stateInSpecialComment(c) + break + } + case State.BeforeAttributeName: { + this.stateBeforeAttributeName(c) + break + } + case State.InTagName: { + this.stateInTagName(c) + break + } + case State.InClosingTagName: { + this.stateInClosingTagName(c) + break + } + case State.BeforeTagName: { + this.stateBeforeTagName(c) + break + } + case State.AfterAttributeName: { + this.stateAfterAttributeName(c) + break + } + case State.InAttributeValueSq: { + this.stateInAttributeValueSingleQuotes(c) + break + } + case State.BeforeAttributeValue: { + this.stateBeforeAttributeValue(c) + break + } + case State.BeforeClosingTagName: { + this.stateBeforeClosingTagName(c) + break + } + case State.AfterClosingTagName: { + this.stateAfterClosingTagName(c) + break + } + case State.BeforeSpecialS: { + this.stateBeforeSpecialS(c) + break + } + case State.InAttributeValueNq: { + this.stateInAttributeValueNoQuotes(c) + break + } + case State.InSelfClosingTag: { + this.stateInSelfClosingTag(c) + break + } + case State.InDeclaration: { + this.stateInDeclaration(c) + break + } + case State.BeforeDeclaration: { + this.stateBeforeDeclaration(c) + break + } + case State.BeforeComment: { + this.stateBeforeComment(c) + break + } + case State.InProcessingInstruction: { + this.stateInProcessingInstruction(c) + break + } + case State.InEntity: { + this.stateInEntity() + break + } + } + this.index++ + } + this.cleanup() + } + + private finish() { + if (this.state === State.InEntity) { + this.entityDecoder.end() + this.state = this.baseState + } + + this.handleTrailingData() + + this.cbs.onend() + } + + /** Handle any trailing data. */ + private handleTrailingData() { + const endIndex = this.buffer.length + this.offset + + // If there is no remaining data, we are done. + if (this.sectionStart >= endIndex) { + return + } + + if (this.state === State.InCommentLike) { + if (this.currentSequence === Sequences.CdataEnd) { + this.cbs.oncdata(this.sectionStart, endIndex, 0) + } else { + this.cbs.oncomment(this.sectionStart, endIndex, 0) + } + } else if ( + this.state === State.InTagName || + this.state === State.BeforeAttributeName || + this.state === State.BeforeAttributeValue || + this.state === State.AfterAttributeName || + this.state === State.InAttributeName || + this.state === State.InAttributeValueSq || + this.state === State.InAttributeValueDq || + this.state === State.InAttributeValueNq || + this.state === State.InClosingTagName + ) { + /* + * If we are currently in an opening or closing tag, us not calling the + * respective callback signals that the tag should be ignored. + */ + } else { + this.cbs.ontext(this.sectionStart, endIndex) + } + } + + private emitCodePoint(cp: number, consumed: number): void { + if ( + this.baseState !== State.Text && + this.baseState !== State.InSpecialTag + ) { + if (this.sectionStart < this.entityStart) { + this.cbs.onattribdata(this.sectionStart, this.entityStart) + } + this.sectionStart = this.entityStart + consumed + this.index = this.sectionStart - 1 + + this.cbs.onattribentity(cp) + } else { + if (this.sectionStart < this.entityStart) { + this.cbs.ontext(this.sectionStart, this.entityStart) + } + this.sectionStart = this.entityStart + consumed + this.index = this.sectionStart - 1 + + this.cbs.ontextentity(cp, this.sectionStart) + } + } +} diff --git a/packages/compiler-core/src/parser/index.ts b/packages/compiler-core/src/parser/index.ts new file mode 100644 index 0000000000..660101d081 --- /dev/null +++ b/packages/compiler-core/src/parser/index.ts @@ -0,0 +1,14 @@ +import { RootNode, createRoot } from '../ast' +import { ParserOptions } from '../options' +import { Parser } from './Parser' + +export function baseParse( + content: string, + options: ParserOptions = {} +): RootNode { + const root = createRoot([]) + new Parser({ + // TODO + }).end(content) + return root +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0b9e554667..c625bda378 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -164,6 +164,9 @@ importers: '@vue/shared': specifier: workspace:* version: link:../shared + entities: + specifier: ^4.5.0 + version: 4.5.0 estree-walker: specifier: ^2.0.2 version: 2.0.2 @@ -2668,7 +2671,6 @@ packages: /entities@4.5.0: resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==} engines: {node: '>=0.12'} - dev: true /error-ex@1.3.2: resolution: {integrity: sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==}