-export const enum TokenType {
- Static,
- Param,
- Group,
-}
-
-const enum TokenizerState {
- Static,
- Param,
- ParamRegExp, // custom re for a param
- ParamRegExpEnd, // check if there is any ? + *
- EscapeNext,
-}
-
-interface TokenStatic {
- type: TokenType.Static
- value: string
-}
-
-interface TokenParam {
- type: TokenType.Param
- regexp?: string
- value: string
- optional: boolean
- repeatable: boolean
-}
-
-interface TokenGroup {
- type: TokenType.Group
- value: Exclude<Token, TokenGroup>[]
-}
-
-type Token = TokenStatic | TokenParam | TokenGroup
-
-const ROOT_TOKEN: Token = {
- type: TokenType.Static,
- value: '',
-}
-
-const VALID_PARAM_RE = /[a-zA-Z0-9_]/
-
-export function tokenizePath(path: string): Array<Token[]> {
- if (!path) return [[]]
- if (path === '/') return [[ROOT_TOKEN]]
- // remove the leading slash
- if (path[0] !== '/') throw new Error('A non-empty path must start with "/"')
-
- function crash(message: string) {
- throw new Error(`ERR (${state})/"${buffer}": ${message}`)
- }
-
- let state: TokenizerState = TokenizerState.Static
- let previousState: TokenizerState = state
- const tokens: Array<Token[]> = []
- // the segment will always be valid because we get into the initial state
- // with the leading /
- let segment!: Token[]
-
- function finalizeSegment() {
- if (segment) tokens.push(segment)
- segment = []
- }
-
- // index on the path
- let i = 0
- // char at index
- let char: string
- // buffer of the value read
- let buffer: string = ''
- // custom regexp for a param
- let customRe: string = ''
-
- function consumeBuffer() {
- if (!buffer) return
-
- if (state === TokenizerState.Static) {
- segment.push({
- type: TokenType.Static,
- value: buffer,
- })
- } else if (
- state === TokenizerState.Param ||
- state === TokenizerState.ParamRegExp ||
- state === TokenizerState.ParamRegExpEnd
- ) {
- if (segment.length > 1 && (char === '*' || char === '+'))
- crash(
- `A repeatable param (${buffer}) must be alone in its segment. eg: '/:ids+.`
- )
- segment.push({
- type: TokenType.Param,
- value: buffer,
- regexp: customRe,
- repeatable: char === '*' || char === '+',
- optional: char === '*' || char === '?',
- })
- } else {
- crash('Invalid state to consume buffer')
- }
- buffer = ''
- }
-
- function addCharToBuffer() {
- buffer += char
- }
-
- while (i < path.length) {
- char = path[i++]
-
- if (char === '\\' && state !== TokenizerState.ParamRegExp) {
- previousState = state
- state = TokenizerState.EscapeNext
- continue
- }
-
- switch (state) {
- case TokenizerState.Static:
- if (char === '/') {
- if (buffer) {
- consumeBuffer()
- }
- finalizeSegment()
- } else if (char === ':') {
- consumeBuffer()
- state = TokenizerState.Param
- } else if (char === '{') {
- // TODO: handle group
- addCharToBuffer()
- } else {
- addCharToBuffer()
- }
- break
-
- case TokenizerState.EscapeNext:
- addCharToBuffer()
- state = previousState
- break
-
- case TokenizerState.Param:
- if (char === '(') {
- state = TokenizerState.ParamRegExp
- customRe = ''
- } else if (VALID_PARAM_RE.test(char)) {
- addCharToBuffer()
- } else {
- consumeBuffer()
- state = TokenizerState.Static
- // go back one character if we were not modifying
- if (char !== '*' && char !== '?' && char !== '+') i--
- }
- break
-
- case TokenizerState.ParamRegExp:
- if (char === ')') {
- // handle the escaped )
- if (customRe[customRe.length - 1] == '\\')
- customRe = customRe.slice(0, -1) + char
- else state = TokenizerState.ParamRegExpEnd
- } else {
- customRe += char
- }
- break
-
- case TokenizerState.ParamRegExpEnd:
- // same as finalizing a param
- consumeBuffer()
- state = TokenizerState.Static
- // go back one character if we were not modifying
- if (char !== '*' && char !== '?' && char !== '+') i--
- break
-
- default:
- crash('Unkwnonw state')
- break
- }
- }
-
- if (state === TokenizerState.ParamRegExp)
- crash(`Unfinished custom RegExp for param "${buffer}"`)
-
- consumeBuffer()
- finalizeSegment()
-
- return tokens
-}
+import { Token, TokenType } from './path-tokenizer'
type Params = Record<string, string | string[]>
-interface ParamKey {
+/**
+ * @description A key
+ */
+interface PathParserParamKey {
name: string
repeatable: boolean
optional: boolean
}
export interface PathParser {
+ /**
+ * The regexp used to match a url
+ */
re: RegExp
+ /**
+ * The score of the parser
+ */
score: Array<number[]>
- keys: ParamKey[]
+ /**
+ * Keys that appeared in the path
+ */
+ keys: PathParserParamKey[]
+ /**
+ * Parses a url and returns the matched params or nul if it doesn't match. An
+ * optional param that isn't preset will be an empty string. A repeatable
+ * param will be an array if there is at least one value.
+ * @param path url to parse
+ * @returns a Params object, empty if there are no params. `null` if there is
+ * no match
+ */
parse(path: string): Params | null
+ /**
+ * Creates a string version of the url
+ * @param params object of params
+ * @returns a url
+ */
stringify(params: Params): string
}
decode?: (value: string) => string
}
+// default pattern for a param: non greedy everything but /
const BASE_PARAM_PATTERN = '[^/]+?'
const BASE_PATH_PARSER_OPTIONS: Required<PathParserOptions> = {
decode: v => v,
}
+// Scoring values used in tokensToParser
const enum PathScore {
_multiplier = 10,
Root = 9 * _multiplier, // just /
*
* @param segments array of segments returned by tokenizePath
* @param extraOptions optional options for the regexp
+ * @returns a PathParser
*/
export function tokensToParser(
segments: Array<Token[]>,
...extraOptions,
}
- // the amount of scores is the same as the length of segments
+ // the amount of scores is the same as the length of segments except for the root segment "/"
let score: Array<number[]> = []
+ // the regexp as a string
let pattern = options.start ? '^' : ''
- const keys: ParamKey[] = []
+ // extracted keys
+ const keys: PathParserParamKey[] = []
for (const segment of segments) {
- // allow an empty path to be different from slash
- // if (!segment.length) pattern += '/'
-
+ // the root segment needs special treatment
const segmentScores: number[] = segment.length ? [] : [PathScore.Root]
for (let tokenIndex = 0; tokenIndex < segment.length; tokenIndex++) {
const { value, repeatable, optional, regexp } = token
keys.push({
name: value,
- repeatable: repeatable,
- optional: optional,
+ repeatable,
+ optional,
})
const re = regexp ? regexp : BASE_PARAM_PATTERN
+ // the user provided a custom regexp /:id(\\d+)
if (re !== BASE_PARAM_PATTERN) {
subSegmentScore += PathScore.BonusCustomRegExp
+ // make sure the regexp is valid before using it
try {
new RegExp(`(${re})`)
} catch (err) {
throw new Error(
- `Invalid custom RegExp for param "${value}": ` + err.message
+ `Invalid custom RegExp for param "${value}" (${re}): ` +
+ err.message
)
}
}
- // (?:\/((?:${re})(?:\/(?:${re}))*))
+
+ // when we repeat we must take care of the repeating leading slash
let subPattern = repeatable ? `((?:${re})(?:/(?:${re}))*)` : `(${re})`
+ // prepend the slash if we are starting a new segment
if (!tokenIndex)
- subPattern = optional ? `(?:/${subPattern})?` : '/' + subPattern
- else subPattern += optional ? '?' : ''
+ subPattern = optional ? `(?:/${subPattern})` : '/' + subPattern
+ if (optional) subPattern += '?'
pattern += subPattern
function stringify(params: Params): string {
let path = ''
- let avoidDuplicatedSlash = false
+ // for optional parameters to allow to be empty
+ let avoidDuplicatedSlash: boolean = false
for (const segment of segments) {
if (!avoidDuplicatedSlash || path[path.length - 1] !== '/') path += '/'
avoidDuplicatedSlash = false
throw new Error(
`Provided param "${value}" is an array but it is not repeatable (* or + modifiers)`
)
+ // TODO: encode, decode values, make sure that wilcard regexp do not encode the `/`
const text: string = Array.isArray(param) ? param.join('/') : param
if (!text) {
- if (!optional) throw new Error(`Missing required param "${value}"`)
- else avoidDuplicatedSlash = true
+ // do not append a slash on the next iteration
+ if (optional) avoidDuplicatedSlash = true
+ else throw new Error(`Missing required param "${value}"`)
}
path += text
}
}
}
-export function compareScoreArray(a: number[], b: number[]): number {
+/**
+ * Compares an array of numbers as used in PathParser.score and returns a
+ * number. This function can be used to `sort` an array
+ * @param a first array of numbers
+ * @param b second array of numbers
+ * @returns 0 if both are equal, < 0 if a should be sorted first, > 0 if b
+ * should be sorted first
+ */
+function compareScoreArray(a: number[], b: number[]): number {
let i = 0
while (i < a.length && i < b.length) {
- if (a[i] < b[i]) return 1
- if (a[i] > b[i]) return -1
+ const diff = b[i] - a[i]
+ // only keep going if diff === 0
+ if (diff) return diff
i++
}
- // if the last subsegment was Static, the shorter
+ // if the last subsegment was Static, the shorter segments should be sorted first
+ // otherwise sort the longest segment first
if (a.length < b.length) {
return a.length === 1 && a[0] === PathScore.Static + PathScore.Segment
? -1
return 0
}
+/**
+ * Compare function that can be used with `sort` to sort an array of PathParser
+ * @param a first PathParser
+ * @param b second PathParser
+ * @returns 0 if both are equal, < 0 if a should be sorted first, > 0 if b
+ */
export function comparePathParserScore(a: PathParser, b: PathParser): number {
let i = 0
const aScore = a.score
i++
}
- // TODO: one is this way the other the opposite it's more complicated than
- // that because with subsegments the length matters while with segment it
- // doesnt (1 vs 1+). So I need to treat the first entry of each array
- // differently
- return aScore.length < bScore.length
- ? 1
- : aScore.length > bScore.length
- ? -1
- : 0
+ // if a and b share the same score entries but b has more, sort b first
+ return bScore.length - aScore.length
+ // this is the ternary version
+ // return aScore.length < bScore.length
+ // ? 1
+ // : aScore.length > bScore.length
+ // ? -1
+ // : 0
}
--- /dev/null
+export const enum TokenType {
+ Static,
+ Param,
+ Group,
+}
+
+const enum TokenizerState {
+ Static,
+ Param,
+ ParamRegExp, // custom re for a param
+ ParamRegExpEnd, // check if there is any ? + *
+ EscapeNext,
+}
+
+interface TokenStatic {
+ type: TokenType.Static
+ value: string
+}
+
+interface TokenParam {
+ type: TokenType.Param
+ regexp?: string
+ value: string
+ optional: boolean
+ repeatable: boolean
+}
+
+interface TokenGroup {
+ type: TokenType.Group
+ value: Exclude<Token, TokenGroup>[]
+}
+
+export type Token = TokenStatic | TokenParam | TokenGroup
+
+const ROOT_TOKEN: Token = {
+ type: TokenType.Static,
+ value: '',
+}
+
+const VALID_PARAM_RE = /[a-zA-Z0-9_]/
+
+export function tokenizePath(path: string): Array<Token[]> {
+ if (!path) return [[]]
+ if (path === '/') return [[ROOT_TOKEN]]
+ // remove the leading slash
+ if (path[0] !== '/') throw new Error('A non-empty path must start with "/"')
+
+ function crash(message: string) {
+ throw new Error(`ERR (${state})/"${buffer}": ${message}`)
+ }
+
+ let state: TokenizerState = TokenizerState.Static
+ let previousState: TokenizerState = state
+ const tokens: Array<Token[]> = []
+ // the segment will always be valid because we get into the initial state
+ // with the leading /
+ let segment!: Token[]
+
+ function finalizeSegment() {
+ if (segment) tokens.push(segment)
+ segment = []
+ }
+
+ // index on the path
+ let i = 0
+ // char at index
+ let char: string
+ // buffer of the value read
+ let buffer: string = ''
+ // custom regexp for a param
+ let customRe: string = ''
+
+ function consumeBuffer() {
+ if (!buffer) return
+
+ if (state === TokenizerState.Static) {
+ segment.push({
+ type: TokenType.Static,
+ value: buffer,
+ })
+ } else if (
+ state === TokenizerState.Param ||
+ state === TokenizerState.ParamRegExp ||
+ state === TokenizerState.ParamRegExpEnd
+ ) {
+ if (segment.length > 1 && (char === '*' || char === '+'))
+ crash(
+ `A repeatable param (${buffer}) must be alone in its segment. eg: '/:ids+.`
+ )
+ segment.push({
+ type: TokenType.Param,
+ value: buffer,
+ regexp: customRe,
+ repeatable: char === '*' || char === '+',
+ optional: char === '*' || char === '?',
+ })
+ } else {
+ crash('Invalid state to consume buffer')
+ }
+ buffer = ''
+ }
+
+ function addCharToBuffer() {
+ buffer += char
+ }
+
+ while (i < path.length) {
+ char = path[i++]
+
+ if (char === '\\' && state !== TokenizerState.ParamRegExp) {
+ previousState = state
+ state = TokenizerState.EscapeNext
+ continue
+ }
+
+ switch (state) {
+ case TokenizerState.Static:
+ if (char === '/') {
+ if (buffer) {
+ consumeBuffer()
+ }
+ finalizeSegment()
+ } else if (char === ':') {
+ consumeBuffer()
+ state = TokenizerState.Param
+ } else if (char === '{') {
+ // TODO: handle group
+ addCharToBuffer()
+ } else {
+ addCharToBuffer()
+ }
+ break
+
+ case TokenizerState.EscapeNext:
+ addCharToBuffer()
+ state = previousState
+ break
+
+ case TokenizerState.Param:
+ if (char === '(') {
+ state = TokenizerState.ParamRegExp
+ customRe = ''
+ } else if (VALID_PARAM_RE.test(char)) {
+ addCharToBuffer()
+ } else {
+ consumeBuffer()
+ state = TokenizerState.Static
+ // go back one character if we were not modifying
+ if (char !== '*' && char !== '?' && char !== '+') i--
+ }
+ break
+
+ case TokenizerState.ParamRegExp:
+ if (char === ')') {
+ // handle the escaped )
+ if (customRe[customRe.length - 1] == '\\')
+ customRe = customRe.slice(0, -1) + char
+ else state = TokenizerState.ParamRegExpEnd
+ } else {
+ customRe += char
+ }
+ break
+
+ case TokenizerState.ParamRegExpEnd:
+ // same as finalizing a param
+ consumeBuffer()
+ state = TokenizerState.Static
+ // go back one character if we were not modifying
+ if (char !== '*' && char !== '?' && char !== '+') i--
+ break
+
+ default:
+ crash('Unkwnonw state')
+ break
+ }
+ }
+
+ if (state === TokenizerState.ParamRegExp)
+ crash(`Unfinished custom RegExp for param "${buffer}"`)
+
+ consumeBuffer()
+ finalizeSegment()
+
+ return tokens
+}