From: Eduardo San Martin Morote Date: Wed, 18 Dec 2019 10:11:50 +0000 (+0100) Subject: refactor(parser): rename files and add docs X-Git-Tag: v4.0.0-alpha.0~134 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=48a94ef66d51f1cdf0662f832ff6953ba3f7a9d0;p=thirdparty%2Fvuejs%2Frouter.git refactor(parser): rename files and add docs --- diff --git a/__tests__/matcher/path-parser.spec.ts b/__tests__/matcher/path-parser.spec.ts index 2afdb682..dbcf90e6 100644 --- a/__tests__/matcher/path-parser.spec.ts +++ b/__tests__/matcher/path-parser.spec.ts @@ -1,8 +1,5 @@ -import { - tokenizePath, - TokenType, - tokensToParser, -} from '../../src/matcher/tokenizer' +import { tokenizePath, TokenType } from '../../src/matcher/path-tokenizer' +import { tokensToParser } from '../../src/matcher/path-parser-ranker' describe('Path parser', () => { describe('tokenizer', () => { @@ -28,6 +25,8 @@ describe('Path parser', () => { ]) }) + // not sure how useful this is and if it's worth supporting because of the + // cost to support the ranking as well it.skip('groups', () => { expect(tokenizePath('/one{-b_:id}')).toEqual([ [ @@ -43,7 +42,7 @@ describe('Path parser', () => { ]) }) - // TODO: add test when groups exist + // same as above it.skip('escapes } inside group', () => { expect(tokenizePath('/{\\{}')).toEqual([ [{ type: TokenType.Static, value: '{' }], diff --git a/__tests__/matcher/path-ranking.spec.ts b/__tests__/matcher/path-ranking.spec.ts index b8eddcda..cc5f91ad 100644 --- a/__tests__/matcher/path-ranking.spec.ts +++ b/__tests__/matcher/path-ranking.spec.ts @@ -1,8 +1,8 @@ +import { tokenizePath } from '../../src/matcher/path-tokenizer' import { tokensToParser, - tokenizePath, comparePathParserScore, -} from '../../src/matcher/tokenizer' +} from '../../src/matcher/path-parser-ranker' type PathParserOptions = Parameters[1] diff --git a/src/matcher/tokenizer.ts b/src/matcher/path-parser-ranker.ts similarity index 53% rename from src/matcher/tokenizer.ts rename to src/matcher/path-parser-ranker.ts index 91461db9..bdb343d9 100644 --- a/src/matcher/tokenizer.ts +++ b/src/matcher/path-parser-ranker.ts @@ -1,202 +1,43 @@ -export const enum TokenType { - Static, - Param, - Group, -} - -const enum TokenizerState { - Static, - Param, - ParamRegExp, // custom re for a param - ParamRegExpEnd, // check if there is any ? + * - EscapeNext, -} - -interface TokenStatic { - type: TokenType.Static - value: string -} - -interface TokenParam { - type: TokenType.Param - regexp?: string - value: string - optional: boolean - repeatable: boolean -} - -interface TokenGroup { - type: TokenType.Group - value: Exclude[] -} - -type Token = TokenStatic | TokenParam | TokenGroup - -const ROOT_TOKEN: Token = { - type: TokenType.Static, - value: '', -} - -const VALID_PARAM_RE = /[a-zA-Z0-9_]/ - -export function tokenizePath(path: string): Array { - if (!path) return [[]] - if (path === '/') return [[ROOT_TOKEN]] - // remove the leading slash - if (path[0] !== '/') throw new Error('A non-empty path must start with "/"') - - function crash(message: string) { - throw new Error(`ERR (${state})/"${buffer}": ${message}`) - } - - let state: TokenizerState = TokenizerState.Static - let previousState: TokenizerState = state - const tokens: Array = [] - // the segment will always be valid because we get into the initial state - // with the leading / - let segment!: Token[] - - function finalizeSegment() { - if (segment) tokens.push(segment) - segment = [] - } - - // index on the path - let i = 0 - // char at index - let char: string - // buffer of the value read - let buffer: string = '' - // custom regexp for a param - let customRe: string = '' - - function consumeBuffer() { - if (!buffer) return - - if (state === TokenizerState.Static) { - segment.push({ - type: TokenType.Static, - value: buffer, - }) - } else if ( - state === TokenizerState.Param || - state === TokenizerState.ParamRegExp || - state === TokenizerState.ParamRegExpEnd - ) { - if (segment.length > 1 && (char === '*' || char === '+')) - crash( - `A repeatable param (${buffer}) must be alone in its segment. eg: '/:ids+.` - ) - segment.push({ - type: TokenType.Param, - value: buffer, - regexp: customRe, - repeatable: char === '*' || char === '+', - optional: char === '*' || char === '?', - }) - } else { - crash('Invalid state to consume buffer') - } - buffer = '' - } - - function addCharToBuffer() { - buffer += char - } - - while (i < path.length) { - char = path[i++] - - if (char === '\\' && state !== TokenizerState.ParamRegExp) { - previousState = state - state = TokenizerState.EscapeNext - continue - } - - switch (state) { - case TokenizerState.Static: - if (char === '/') { - if (buffer) { - consumeBuffer() - } - finalizeSegment() - } else if (char === ':') { - consumeBuffer() - state = TokenizerState.Param - } else if (char === '{') { - // TODO: handle group - addCharToBuffer() - } else { - addCharToBuffer() - } - break - - case TokenizerState.EscapeNext: - addCharToBuffer() - state = previousState - break - - case TokenizerState.Param: - if (char === '(') { - state = TokenizerState.ParamRegExp - customRe = '' - } else if (VALID_PARAM_RE.test(char)) { - addCharToBuffer() - } else { - consumeBuffer() - state = TokenizerState.Static - // go back one character if we were not modifying - if (char !== '*' && char !== '?' && char !== '+') i-- - } - break - - case TokenizerState.ParamRegExp: - if (char === ')') { - // handle the escaped ) - if (customRe[customRe.length - 1] == '\\') - customRe = customRe.slice(0, -1) + char - else state = TokenizerState.ParamRegExpEnd - } else { - customRe += char - } - break - - case TokenizerState.ParamRegExpEnd: - // same as finalizing a param - consumeBuffer() - state = TokenizerState.Static - // go back one character if we were not modifying - if (char !== '*' && char !== '?' && char !== '+') i-- - break - - default: - crash('Unkwnonw state') - break - } - } - - if (state === TokenizerState.ParamRegExp) - crash(`Unfinished custom RegExp for param "${buffer}"`) - - consumeBuffer() - finalizeSegment() - - return tokens -} +import { Token, TokenType } from './path-tokenizer' type Params = Record -interface ParamKey { +/** + * @description A key + */ +interface PathParserParamKey { name: string repeatable: boolean optional: boolean } export interface PathParser { + /** + * The regexp used to match a url + */ re: RegExp + /** + * The score of the parser + */ score: Array - keys: ParamKey[] + /** + * Keys that appeared in the path + */ + keys: PathParserParamKey[] + /** + * Parses a url and returns the matched params or nul if it doesn't match. An + * optional param that isn't preset will be an empty string. A repeatable + * param will be an array if there is at least one value. + * @param path url to parse + * @returns a Params object, empty if there are no params. `null` if there is + * no match + */ parse(path: string): Params | null + /** + * Creates a string version of the url + * @param params object of params + * @returns a url + */ stringify(params: Params): string } @@ -227,6 +68,7 @@ interface PathParserOptions { decode?: (value: string) => string } +// default pattern for a param: non greedy everything but / const BASE_PARAM_PATTERN = '[^/]+?' const BASE_PATH_PARSER_OPTIONS: Required = { @@ -239,6 +81,7 @@ const BASE_PATH_PARSER_OPTIONS: Required = { decode: v => v, } +// Scoring values used in tokensToParser const enum PathScore { _multiplier = 10, Root = 9 * _multiplier, // just / @@ -260,6 +103,7 @@ const enum PathScore { * * @param segments array of segments returned by tokenizePath * @param extraOptions optional options for the regexp + * @returns a PathParser */ export function tokensToParser( segments: Array, @@ -270,15 +114,15 @@ export function tokensToParser( ...extraOptions, } - // the amount of scores is the same as the length of segments + // the amount of scores is the same as the length of segments except for the root segment "/" let score: Array = [] + // the regexp as a string let pattern = options.start ? '^' : '' - const keys: ParamKey[] = [] + // extracted keys + const keys: PathParserParamKey[] = [] for (const segment of segments) { - // allow an empty path to be different from slash - // if (!segment.length) pattern += '/' - + // the root segment needs special treatment const segmentScores: number[] = segment.length ? [] : [PathScore.Root] for (let tokenIndex = 0; tokenIndex < segment.length; tokenIndex++) { @@ -297,26 +141,31 @@ export function tokensToParser( const { value, repeatable, optional, regexp } = token keys.push({ name: value, - repeatable: repeatable, - optional: optional, + repeatable, + optional, }) const re = regexp ? regexp : BASE_PARAM_PATTERN + // the user provided a custom regexp /:id(\\d+) if (re !== BASE_PARAM_PATTERN) { subSegmentScore += PathScore.BonusCustomRegExp + // make sure the regexp is valid before using it try { new RegExp(`(${re})`) } catch (err) { throw new Error( - `Invalid custom RegExp for param "${value}": ` + err.message + `Invalid custom RegExp for param "${value}" (${re}): ` + + err.message ) } } - // (?:\/((?:${re})(?:\/(?:${re}))*)) + + // when we repeat we must take care of the repeating leading slash let subPattern = repeatable ? `((?:${re})(?:/(?:${re}))*)` : `(${re})` + // prepend the slash if we are starting a new segment if (!tokenIndex) - subPattern = optional ? `(?:/${subPattern})?` : '/' + subPattern - else subPattern += optional ? '?' : '' + subPattern = optional ? `(?:/${subPattern})` : '/' + subPattern + if (optional) subPattern += '?' pattern += subPattern @@ -362,7 +211,8 @@ export function tokensToParser( function stringify(params: Params): string { let path = '' - let avoidDuplicatedSlash = false + // for optional parameters to allow to be empty + let avoidDuplicatedSlash: boolean = false for (const segment of segments) { if (!avoidDuplicatedSlash || path[path.length - 1] !== '/') path += '/' avoidDuplicatedSlash = false @@ -378,10 +228,12 @@ export function tokensToParser( throw new Error( `Provided param "${value}" is an array but it is not repeatable (* or + modifiers)` ) + // TODO: encode, decode values, make sure that wilcard regexp do not encode the `/` const text: string = Array.isArray(param) ? param.join('/') : param if (!text) { - if (!optional) throw new Error(`Missing required param "${value}"`) - else avoidDuplicatedSlash = true + // do not append a slash on the next iteration + if (optional) avoidDuplicatedSlash = true + else throw new Error(`Missing required param "${value}"`) } path += text } @@ -400,16 +252,26 @@ export function tokensToParser( } } -export function compareScoreArray(a: number[], b: number[]): number { +/** + * Compares an array of numbers as used in PathParser.score and returns a + * number. This function can be used to `sort` an array + * @param a first array of numbers + * @param b second array of numbers + * @returns 0 if both are equal, < 0 if a should be sorted first, > 0 if b + * should be sorted first + */ +function compareScoreArray(a: number[], b: number[]): number { let i = 0 while (i < a.length && i < b.length) { - if (a[i] < b[i]) return 1 - if (a[i] > b[i]) return -1 + const diff = b[i] - a[i] + // only keep going if diff === 0 + if (diff) return diff i++ } - // if the last subsegment was Static, the shorter + // if the last subsegment was Static, the shorter segments should be sorted first + // otherwise sort the longest segment first if (a.length < b.length) { return a.length === 1 && a[0] === PathScore.Static + PathScore.Segment ? -1 @@ -423,6 +285,12 @@ export function compareScoreArray(a: number[], b: number[]): number { return 0 } +/** + * Compare function that can be used with `sort` to sort an array of PathParser + * @param a first PathParser + * @param b second PathParser + * @returns 0 if both are equal, < 0 if a should be sorted first, > 0 if b + */ export function comparePathParserScore(a: PathParser, b: PathParser): number { let i = 0 const aScore = a.score @@ -435,13 +303,12 @@ export function comparePathParserScore(a: PathParser, b: PathParser): number { i++ } - // TODO: one is this way the other the opposite it's more complicated than - // that because with subsegments the length matters while with segment it - // doesnt (1 vs 1+). So I need to treat the first entry of each array - // differently - return aScore.length < bScore.length - ? 1 - : aScore.length > bScore.length - ? -1 - : 0 + // if a and b share the same score entries but b has more, sort b first + return bScore.length - aScore.length + // this is the ternary version + // return aScore.length < bScore.length + // ? 1 + // : aScore.length > bScore.length + // ? -1 + // : 0 } diff --git a/src/matcher/path-ranker.ts b/src/matcher/path-ranker.ts deleted file mode 100644 index ca6ccf04..00000000 --- a/src/matcher/path-ranker.ts +++ /dev/null @@ -1,11 +0,0 @@ -export function comparePathParserScore(a: number[], b: number[]): number { - let i = 0 - while (i < a.length && i < b.length) { - if (a[i] < b[i]) return 1 - if (a[i] > b[i]) return -1 - - i++ - } - - return a.length < b.length ? 1 : a.length > b.length ? -1 : 0 -} diff --git a/src/matcher/path-tokenizer.ts b/src/matcher/path-tokenizer.ts new file mode 100644 index 00000000..d68bad38 --- /dev/null +++ b/src/matcher/path-tokenizer.ts @@ -0,0 +1,185 @@ +export const enum TokenType { + Static, + Param, + Group, +} + +const enum TokenizerState { + Static, + Param, + ParamRegExp, // custom re for a param + ParamRegExpEnd, // check if there is any ? + * + EscapeNext, +} + +interface TokenStatic { + type: TokenType.Static + value: string +} + +interface TokenParam { + type: TokenType.Param + regexp?: string + value: string + optional: boolean + repeatable: boolean +} + +interface TokenGroup { + type: TokenType.Group + value: Exclude[] +} + +export type Token = TokenStatic | TokenParam | TokenGroup + +const ROOT_TOKEN: Token = { + type: TokenType.Static, + value: '', +} + +const VALID_PARAM_RE = /[a-zA-Z0-9_]/ + +export function tokenizePath(path: string): Array { + if (!path) return [[]] + if (path === '/') return [[ROOT_TOKEN]] + // remove the leading slash + if (path[0] !== '/') throw new Error('A non-empty path must start with "/"') + + function crash(message: string) { + throw new Error(`ERR (${state})/"${buffer}": ${message}`) + } + + let state: TokenizerState = TokenizerState.Static + let previousState: TokenizerState = state + const tokens: Array = [] + // the segment will always be valid because we get into the initial state + // with the leading / + let segment!: Token[] + + function finalizeSegment() { + if (segment) tokens.push(segment) + segment = [] + } + + // index on the path + let i = 0 + // char at index + let char: string + // buffer of the value read + let buffer: string = '' + // custom regexp for a param + let customRe: string = '' + + function consumeBuffer() { + if (!buffer) return + + if (state === TokenizerState.Static) { + segment.push({ + type: TokenType.Static, + value: buffer, + }) + } else if ( + state === TokenizerState.Param || + state === TokenizerState.ParamRegExp || + state === TokenizerState.ParamRegExpEnd + ) { + if (segment.length > 1 && (char === '*' || char === '+')) + crash( + `A repeatable param (${buffer}) must be alone in its segment. eg: '/:ids+.` + ) + segment.push({ + type: TokenType.Param, + value: buffer, + regexp: customRe, + repeatable: char === '*' || char === '+', + optional: char === '*' || char === '?', + }) + } else { + crash('Invalid state to consume buffer') + } + buffer = '' + } + + function addCharToBuffer() { + buffer += char + } + + while (i < path.length) { + char = path[i++] + + if (char === '\\' && state !== TokenizerState.ParamRegExp) { + previousState = state + state = TokenizerState.EscapeNext + continue + } + + switch (state) { + case TokenizerState.Static: + if (char === '/') { + if (buffer) { + consumeBuffer() + } + finalizeSegment() + } else if (char === ':') { + consumeBuffer() + state = TokenizerState.Param + } else if (char === '{') { + // TODO: handle group + addCharToBuffer() + } else { + addCharToBuffer() + } + break + + case TokenizerState.EscapeNext: + addCharToBuffer() + state = previousState + break + + case TokenizerState.Param: + if (char === '(') { + state = TokenizerState.ParamRegExp + customRe = '' + } else if (VALID_PARAM_RE.test(char)) { + addCharToBuffer() + } else { + consumeBuffer() + state = TokenizerState.Static + // go back one character if we were not modifying + if (char !== '*' && char !== '?' && char !== '+') i-- + } + break + + case TokenizerState.ParamRegExp: + if (char === ')') { + // handle the escaped ) + if (customRe[customRe.length - 1] == '\\') + customRe = customRe.slice(0, -1) + char + else state = TokenizerState.ParamRegExpEnd + } else { + customRe += char + } + break + + case TokenizerState.ParamRegExpEnd: + // same as finalizing a param + consumeBuffer() + state = TokenizerState.Static + // go back one character if we were not modifying + if (char !== '*' && char !== '?' && char !== '+') i-- + break + + default: + crash('Unkwnonw state') + break + } + } + + if (state === TokenizerState.ParamRegExp) + crash(`Unfinished custom RegExp for param "${buffer}"`) + + consumeBuffer() + finalizeSegment() + + return tokens +}