From: Evan You Date: Tue, 14 Nov 2023 08:35:52 +0000 (+0800) Subject: wip: refactor line / column generation X-Git-Tag: v3.4.0-alpha.2~65 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f7db16b9cfc0859fc8a70015108991182b0a6ab1;p=thirdparty%2Fvuejs%2Fcore.git wip: refactor line / column generation --- diff --git a/packages/compiler-core/src/parser/Tokenizer.ts b/packages/compiler-core/src/parser/Tokenizer.ts index 4dedf9a465..8c2269b1e4 100644 --- a/packages/compiler-core/src/parser/Tokenizer.ts +++ b/packages/compiler-core/src/parser/Tokenizer.ts @@ -27,6 +27,7 @@ import { DecodingMode, htmlDecodeTree } from 'entities/lib/decode.js' +import { Position } from '../ast' const enum CharCodes { Tab = 0x9, // "\t" @@ -120,21 +121,25 @@ export enum QuoteType { } export interface Callbacks { + ontext(start: number, endIndex: number): void + ontextentity(codepoint: number, endIndex: number): void + + onopentagname(start: number, endIndex: number): void + onopentagend(endIndex: number): void + onselfclosingtag(endIndex: number): void + onclosetag(start: number, endIndex: number): void + onattribdata(start: number, endIndex: number): void onattribentity(codepoint: number): void onattribend(quote: QuoteType, endIndex: number): void onattribname(start: number, endIndex: number): void - oncdata(start: number, endIndex: number, endOffset: number): void - onclosetag(start: number, endIndex: number): void + oncomment(start: number, endIndex: number, endOffset: number): void - ondeclaration(start: number, endIndex: number): void + oncdata(start: number, endIndex: number, endOffset: number): void + + // onprocessinginstruction(start: number, endIndex: number): void + // ondeclaration(start: number, endIndex: number): void onend(): void - onopentagend(endIndex: number): void - onopentagname(start: number, endIndex: number): void - onprocessinginstruction(start: number, endIndex: number): void - onselfclosingtag(endIndex: number): void - ontext(start: number, endIndex: number): void - ontextentity(codepoint: number, endIndex: number): void } /** @@ -167,15 +172,12 @@ export default class Tokenizer { private baseState = State.Text /** For special parsing behavior inside of script and style tags. */ private isSpecial = false + /** Reocrd newline positions for fast line / column calculation */ + private newlines: number[] = [] private readonly decodeEntities: boolean private readonly entityDecoder: EntityDecoder - public line = 1 - public column = 1 - public startLine = 1 - public startColumn = 1 - constructor( { decodeEntities = true }: { decodeEntities?: boolean }, private readonly cbs: Callbacks @@ -189,20 +191,35 @@ export default class Tokenizer { public reset(): void { this.state = State.Text this.buffer = '' - this.recordStart(0) + this.sectionStart = 0 this.index = 0 - this.line = 1 - this.column = 1 - this.startLine = 1 - this.startColumn = 1 this.baseState = State.Text this.currentSequence = undefined! + this.newlines.length = 0 } - private recordStart(start = this.index) { - this.sectionStart = start - this.startLine = this.line - this.startColumn = this.column + (start - this.index) + /** + * Generate Position object with line / column information using recorded + * newline positions. We know the index is always going to be an already + * processed index, so all the newlines up to this index should have been + * recorded. + */ + public getPositionForIndex(index: number): Position { + let line = 1 + let column = index + 1 + for (let i = this.newlines.length - 1; i >= 0; i--) { + const newlineIndex = this.newlines[i] + if (index > newlineIndex) { + line = i + 2 + column = index - newlineIndex + break + } + } + return { + offset: index, + line, + column + } } private stateText(c: number): void { @@ -214,7 +231,7 @@ export default class Tokenizer { this.cbs.ontext(this.sectionStart, this.index) } this.state = State.BeforeTagName - this.recordStart() + this.sectionStart = this.index } else if (this.decodeEntities && c === CharCodes.Amp) { this.startEntity() } @@ -257,7 +274,7 @@ export default class Tokenizer { } this.isSpecial = false - this.recordStart(endOfText + 2) // Skip over the `" if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { this.state = State.Text - this.recordStart(this.index + 1) + this.sectionStart = this.index + 1 } } private stateBeforeAttributeName(c: number): void { @@ -440,19 +457,19 @@ export default class Tokenizer { } else { this.state = State.Text } - this.recordStart(this.index + 1) + this.sectionStart = this.index + 1 } else if (c === CharCodes.Slash) { this.state = State.InSelfClosingTag } else if (!isWhitespace(c)) { this.state = State.InAttributeName - this.recordStart() + this.sectionStart = this.index } } private stateInSelfClosingTag(c: number): void { if (c === CharCodes.Gt) { this.cbs.onselfclosingtag(this.index) this.state = State.Text - this.recordStart(this.index + 1) + this.sectionStart = this.index + 1 this.isSpecial = false // Reset special state, in case of self-closing special tags } else if (!isWhitespace(c)) { this.state = State.BeforeAttributeName @@ -462,7 +479,7 @@ export default class Tokenizer { private stateInAttributeName(c: number): void { if (c === CharCodes.Eq || isEndOfTagSection(c)) { this.cbs.onattribname(this.sectionStart, this.index) - this.recordStart() + this.sectionStart = this.index this.state = State.AfterAttributeName this.stateAfterAttributeName(c) } @@ -472,24 +489,24 @@ export default class Tokenizer { this.state = State.BeforeAttributeValue } else if (c === CharCodes.Slash || c === CharCodes.Gt) { this.cbs.onattribend(QuoteType.NoValue, this.sectionStart) - this.recordStart(-1) + this.sectionStart = -1 this.state = State.BeforeAttributeName this.stateBeforeAttributeName(c) } else if (!isWhitespace(c)) { this.cbs.onattribend(QuoteType.NoValue, this.sectionStart) this.state = State.InAttributeName - this.recordStart() + this.sectionStart = this.index } } private stateBeforeAttributeValue(c: number): void { if (c === CharCodes.DoubleQuote) { this.state = State.InAttributeValueDq - this.recordStart(this.index + 1) + this.sectionStart = this.index + 1 } else if (c === CharCodes.SingleQuote) { this.state = State.InAttributeValueSq - this.recordStart(this.index + 1) + this.sectionStart = this.index + 1 } else if (!isWhitespace(c)) { - this.recordStart() + this.sectionStart = this.index this.state = State.InAttributeValueNq this.stateInAttributeValueNoQuotes(c) // Reconsume token } @@ -497,7 +514,7 @@ export default class Tokenizer { private handleInAttributeValue(c: number, quote: number) { if (c === quote || (!this.decodeEntities && this.fastForwardTo(quote))) { this.cbs.onattribdata(this.sectionStart, this.index) - this.recordStart(-1) + this.sectionStart = -1 this.cbs.onattribend( quote === CharCodes.DoubleQuote ? QuoteType.Double : QuoteType.Single, this.index + 1 @@ -516,7 +533,7 @@ export default class Tokenizer { private stateInAttributeValueNoQuotes(c: number): void { if (isWhitespace(c) || c === CharCodes.Gt) { this.cbs.onattribdata(this.sectionStart, this.index) - this.recordStart(-1) + this.sectionStart = -1 this.cbs.onattribend(QuoteType.Unquoted, this.index) this.state = State.BeforeAttributeName this.stateBeforeAttributeName(c) @@ -535,16 +552,16 @@ export default class Tokenizer { } private stateInDeclaration(c: number): void { if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { - this.cbs.ondeclaration(this.sectionStart, this.index) + // this.cbs.ondeclaration(this.sectionStart, this.index) this.state = State.Text - this.recordStart(this.index + 1) + this.sectionStart = this.index + 1 } } private stateInProcessingInstruction(c: number): void { if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { - this.cbs.onprocessinginstruction(this.sectionStart, this.index) + // this.cbs.onprocessinginstruction(this.sectionStart, this.index) this.state = State.Text - this.recordStart(this.index + 1) + this.sectionStart = this.index + 1 } } private stateBeforeComment(c: number): void { @@ -553,7 +570,7 @@ export default class Tokenizer { this.currentSequence = Sequences.CommentEnd // Allow short comments (eg. ) this.sequenceIndex = 2 - this.recordStart(this.index + 1) + this.sectionStart = this.index + 1 } else { this.state = State.InDeclaration } @@ -562,7 +579,7 @@ export default class Tokenizer { if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { this.cbs.oncomment(this.sectionStart, this.index, 0) this.state = State.Text - this.recordStart(this.index + 1) + this.sectionStart = this.index + 1 } } private stateBeforeSpecialS(c: number): void { @@ -715,14 +732,10 @@ export default class Tokenizer { break } } - this.index++ - // line / column handling if (c === CharCodes.NewLine) { - this.line++ - this.column = 1 - } else { - this.column++ + this.newlines.push(this.index) } + this.index++ } this.cleanup() this.finish() @@ -739,14 +752,14 @@ export default class Tokenizer { (this.state === State.InSpecialTag && this.sequenceIndex === 0) ) { this.cbs.ontext(this.sectionStart, this.index) - this.recordStart() + this.sectionStart = this.index } else if ( this.state === State.InAttributeValueDq || this.state === State.InAttributeValueSq || this.state === State.InAttributeValueNq ) { this.cbs.onattribdata(this.sectionStart, this.index) - this.recordStart() + this.sectionStart = this.index } } } @@ -805,7 +818,7 @@ export default class Tokenizer { if (this.sectionStart < this.entityStart) { this.cbs.onattribdata(this.sectionStart, this.entityStart) } - this.recordStart(this.entityStart + consumed) + this.sectionStart = this.entityStart + consumed this.index = this.sectionStart - 1 this.cbs.onattribentity(cp) @@ -813,7 +826,7 @@ export default class Tokenizer { if (this.sectionStart < this.entityStart) { this.cbs.ontext(this.sectionStart, this.entityStart) } - this.recordStart(this.entityStart + consumed) + this.sectionStart = this.entityStart + consumed this.index = this.sectionStart - 1 this.cbs.ontextentity(cp, this.sectionStart) diff --git a/packages/compiler-core/src/parser/index.ts b/packages/compiler-core/src/parser/index.ts index 2c7cf540d0..88c0b5576d 100644 --- a/packages/compiler-core/src/parser/index.ts +++ b/packages/compiler-core/src/parser/index.ts @@ -228,20 +228,6 @@ const tokenizer = new Tokenizer( endIndex = end // TODO throw error startIndex = end + 1 - }, - - // TODO ignore - ondeclaration(start, end) { - endIndex = end - // TODO onprocessinginstruction - startIndex = end + 1 - }, - - // TODO ignore - onprocessinginstruction(start, end) { - endIndex = end - // TODO onprocessinginstruction - startIndex = end + 1 } } ) @@ -306,12 +292,8 @@ function onText(content: string, start: number, end: number) { type: NodeTypes.TEXT, content, loc: { - start: { - offset: start, - line: tokenizer.startLine, - column: tokenizer.startColumn - }, - end: { offset: end, line: tokenizer.line, column: tokenizer.column }, + start: tokenizer.getPositionForIndex(start), + end: tokenizer.getPositionForIndex(end), source: content } })