refactor(parser): rename files and add docs

author Eduardo San Martin Morote <posva13@gmail.com>

Wed, 18 Dec 2019 10:11:50 +0000 (11:11 +0100)

committer Eduardo San Martin Morote <posva13@gmail.com>

Wed, 18 Dec 2019 10:11:50 +0000 (11:11 +0100)
author Eduardo San Martin Morote <posva13@gmail.com>
Wed, 18 Dec 2019 10:11:50 +0000 (11:11 +0100)
committer Eduardo San Martin Morote <posva13@gmail.com>
Wed, 18 Dec 2019 10:11:50 +0000 (11:11 +0100)
diff --git a/__tests__/matcher/path-parser.spec.ts b/__tests__/matcher/path-parser.spec.ts

index 2afdb68245dc566b84fe42f7ada7af4cfab5cc0b..dbcf90e69d0eba8e8896698d4d7b0da39e6a726a 100644 (file)
--- a/__tests__/matcher/path-parser.spec.ts
+++ b/__tests__/matcher/path-parser.spec.ts
@@ -1,8 +1,5 @@
-import {
-  tokenizePath,
-  TokenType,
-  tokensToParser,
-} from '../../src/matcher/tokenizer'
+import { tokenizePath, TokenType } from '../../src/matcher/path-tokenizer'
+import { tokensToParser } from '../../src/matcher/path-parser-ranker'
  
  describe('Path parser', () => {
    describe('tokenizer', () => {
@@ -28,6 +25,8 @@ describe('Path parser', () => {
        ])
      })
  
+    // not sure how useful this is and if it's worth supporting because of the
+    // cost to support the ranking as well
      it.skip('groups', () => {
        expect(tokenizePath('/one{-b_:id}')).toEqual([
          [
@@ -43,7 +42,7 @@ describe('Path parser', () => {
        ])
      })
  
-    // TODO: add test when groups exist
+    // same as above
      it.skip('escapes } inside group', () => {
        expect(tokenizePath('/{\\{}')).toEqual([
          [{ type: TokenType.Static, value: '{' }],
diff --git a/__tests__/matcher/path-ranking.spec.ts b/__tests__/matcher/path-ranking.spec.ts

index b8eddcdafee0423a17381b82e487bd6a7f1c394e..cc5f91adfe4f4c7d4a9cfbb2389c5b0c3a7287d7 100644 (file)
--- a/__tests__/matcher/path-ranking.spec.ts
+++ b/__tests__/matcher/path-ranking.spec.ts
@@ -1,8 +1,8 @@
+import { tokenizePath } from '../../src/matcher/path-tokenizer'
  import {
    tokensToParser,
-  tokenizePath,
    comparePathParserScore,
-} from '../../src/matcher/tokenizer'
+} from '../../src/matcher/path-parser-ranker'
  
  type PathParserOptions = Parameters<typeof tokensToParser>[1]
  
diff --git a/src/matcher/tokenizer.ts b/src/matcher/path-parser-ranker.ts

similarity index 53%

rename from src/matcher/tokenizer.ts

rename to src/matcher/path-parser-ranker.ts

index 91461db98dd9a7f961e67ef8b316123771c0d04b..bdb343d9daf39ecd336eb5c30caf2a90db71b749 100644 (file)
--- a/src/matcher/tokenizer.ts
+++ b/src/matcher/path-parser-ranker.ts
@@ -1,202 +1,43 @@
-export const enum TokenType {
-  Static,
-  Param,
-  Group,
-}
-
-const enum TokenizerState {
-  Static,
-  Param,
-  ParamRegExp, // custom re for a param
-  ParamRegExpEnd, // check if there is any ? + *
-  EscapeNext,
-}
-
-interface TokenStatic {
-  type: TokenType.Static
-  value: string
-}
-
-interface TokenParam {
-  type: TokenType.Param
-  regexp?: string
-  value: string
-  optional: boolean
-  repeatable: boolean
-}
-
-interface TokenGroup {
-  type: TokenType.Group
-  value: Exclude<Token, TokenGroup>[]
-}
-
-type Token = TokenStatic | TokenParam | TokenGroup
-
-const ROOT_TOKEN: Token = {
-  type: TokenType.Static,
-  value: '',
-}
-
-const VALID_PARAM_RE = /[a-zA-Z0-9_]/
-
-export function tokenizePath(path: string): Array<Token[]> {
-  if (!path) return [[]]
-  if (path === '/') return [[ROOT_TOKEN]]
-  // remove the leading slash
-  if (path[0] !== '/') throw new Error('A non-empty path must start with "/"')
-
-  function crash(message: string) {
-    throw new Error(`ERR (${state})/"${buffer}": ${message}`)
-  }
-
-  let state: TokenizerState = TokenizerState.Static
-  let previousState: TokenizerState = state
-  const tokens: Array<Token[]> = []
-  // the segment will always be valid because we get into the initial state
-  // with the leading /
-  let segment!: Token[]
-
-  function finalizeSegment() {
-    if (segment) tokens.push(segment)
-    segment = []
-  }
-
-  // index on the path
-  let i = 0
-  // char at index
-  let char: string
-  // buffer of the value read
-  let buffer: string = ''
-  // custom regexp for a param
-  let customRe: string = ''
-
-  function consumeBuffer() {
-    if (!buffer) return
-
-    if (state === TokenizerState.Static) {
-      segment.push({
-        type: TokenType.Static,
-        value: buffer,
-      })
-    } else if (
-      state === TokenizerState.Param ||
-      state === TokenizerState.ParamRegExp ||
-      state === TokenizerState.ParamRegExpEnd
-    ) {
-      if (segment.length > 1 && (char === '*' || char === '+'))
-        crash(
-          `A repeatable param (${buffer}) must be alone in its segment. eg: '/:ids+.`
-        )
-      segment.push({
-        type: TokenType.Param,
-        value: buffer,
-        regexp: customRe,
-        repeatable: char === '*' || char === '+',
-        optional: char === '*' || char === '?',
-      })
-    } else {
-      crash('Invalid state to consume buffer')
-    }
-    buffer = ''
-  }
-
-  function addCharToBuffer() {
-    buffer += char
-  }
-
-  while (i < path.length) {
-    char = path[i++]
-
-    if (char === '\\' && state !== TokenizerState.ParamRegExp) {
-      previousState = state
-      state = TokenizerState.EscapeNext
-      continue
-    }
-
-    switch (state) {
-      case TokenizerState.Static:
-        if (char === '/') {
-          if (buffer) {
-            consumeBuffer()
-          }
-          finalizeSegment()
-        } else if (char === ':') {
-          consumeBuffer()
-          state = TokenizerState.Param
-        } else if (char === '{') {
-          // TODO: handle group
-          addCharToBuffer()
-        } else {
-          addCharToBuffer()
-        }
-        break
-
-      case TokenizerState.EscapeNext:
-        addCharToBuffer()
-        state = previousState
-        break
-
-      case TokenizerState.Param:
-        if (char === '(') {
-          state = TokenizerState.ParamRegExp
-          customRe = ''
-        } else if (VALID_PARAM_RE.test(char)) {
-          addCharToBuffer()
-        } else {
-          consumeBuffer()
-          state = TokenizerState.Static
-          // go back one character if we were not modifying
-          if (char !== '*' && char !== '?' && char !== '+') i--
-        }
-        break
-
-      case TokenizerState.ParamRegExp:
-        if (char === ')') {
-          // handle the escaped )
-          if (customRe[customRe.length - 1] == '\\')
-            customRe = customRe.slice(0, -1) + char
-          else state = TokenizerState.ParamRegExpEnd
-        } else {
-          customRe += char
-        }
-        break
-
-      case TokenizerState.ParamRegExpEnd:
-        // same as finalizing a param
-        consumeBuffer()
-        state = TokenizerState.Static
-        // go back one character if we were not modifying
-        if (char !== '*' && char !== '?' && char !== '+') i--
-        break
-
-      default:
-        crash('Unkwnonw state')
-        break
-    }
-  }
-
-  if (state === TokenizerState.ParamRegExp)
-    crash(`Unfinished custom RegExp for param "${buffer}"`)
-
-  consumeBuffer()
-  finalizeSegment()
-
-  return tokens
-}
+import { Token, TokenType } from './path-tokenizer'
  
  type Params = Record<string, string | string[]>
  
-interface ParamKey {
+/**
+ * @description A key
+ */
+interface PathParserParamKey {
    name: string
    repeatable: boolean
    optional: boolean
  }
  
  export interface PathParser {
+  /**
+   * The regexp used to match a url
+   */
    re: RegExp
+  /**
+   * The score of the parser
+   */
    score: Array<number[]>
-  keys: ParamKey[]
+  /**
+   * Keys that appeared in the path
+   */
+  keys: PathParserParamKey[]
+  /**
+   * Parses a url and returns the matched params or nul if it doesn't match. An
+   * optional param that isn't preset will be an empty string. A repeatable
+   * param will be an array if there is at least one value.
+   * @param path url to parse
+   * @returns a Params object, empty if there are no params. `null` if there is
+   * no match
+   */
    parse(path: string): Params | null
+  /**
+   * Creates a string version of the url
+   * @param params object of params
+   * @returns a url
+   */
    stringify(params: Params): string
  }
  
@@ -227,6 +68,7 @@ interface PathParserOptions {
    decode?: (value: string) => string
  }
  
+// default pattern for a param: non greedy everything but /
  const BASE_PARAM_PATTERN = '[^/]+?'
  
  const BASE_PATH_PARSER_OPTIONS: Required<PathParserOptions> = {
@@ -239,6 +81,7 @@ const BASE_PATH_PARSER_OPTIONS: Required<PathParserOptions> = {
    decode: v => v,
  }
  
+// Scoring values used in tokensToParser
  const enum PathScore {
    _multiplier = 10,
    Root = 9 * _multiplier, // just /
@@ -260,6 +103,7 @@ const enum PathScore {
   *
   * @param segments array of segments returned by tokenizePath
   * @param extraOptions optional options for the regexp
+ * @returns a PathParser
   */
  export function tokensToParser(
    segments: Array<Token[]>,
@@ -270,15 +114,15 @@ export function tokensToParser(
      ...extraOptions,
    }
  
-  // the amount of scores is the same as the length of segments
+  // the amount of scores is the same as the length of segments except for the root segment "/"
    let score: Array<number[]> = []
+  // the regexp as a string
    let pattern = options.start ? '^' : ''
-  const keys: ParamKey[] = []
+  // extracted keys
+  const keys: PathParserParamKey[] = []
  
    for (const segment of segments) {
-    // allow an empty path to be different from slash
-    // if (!segment.length) pattern += '/'
-
+    // the root segment needs special treatment
      const segmentScores: number[] = segment.length ? [] : [PathScore.Root]
  
      for (let tokenIndex = 0; tokenIndex < segment.length; tokenIndex++) {
@@ -297,26 +141,31 @@ export function tokensToParser(
          const { value, repeatable, optional, regexp } = token
          keys.push({
            name: value,
-          repeatable: repeatable,
-          optional: optional,
+          repeatable,
+          optional,
          })
          const re = regexp ? regexp : BASE_PARAM_PATTERN
+        // the user provided a custom regexp /:id(\\d+)
          if (re !== BASE_PARAM_PATTERN) {
            subSegmentScore += PathScore.BonusCustomRegExp
+          // make sure the regexp is valid before using it
            try {
              new RegExp(`(${re})`)
            } catch (err) {
              throw new Error(
-              `Invalid custom RegExp for param "${value}": ` + err.message
+              `Invalid custom RegExp for param "${value}" (${re}): ` +
+                err.message
              )
            }
          }
-        // (?:\/((?:${re})(?:\/(?:${re}))*))
+
+        // when we repeat we must take care of the repeating leading slash
          let subPattern = repeatable ? `((?:${re})(?:/(?:${re}))*)` : `(${re})`
  
+        // prepend the slash if we are starting a new segment
          if (!tokenIndex)
-          subPattern = optional ? `(?:/${subPattern})?` : '/' + subPattern
-        else subPattern += optional ? '?' : ''
+          subPattern = optional ? `(?:/${subPattern})` : '/' + subPattern
+        if (optional) subPattern += '?'
  
          pattern += subPattern
  
@@ -362,7 +211,8 @@ export function tokensToParser(
  
    function stringify(params: Params): string {
      let path = ''
-    let avoidDuplicatedSlash = false
+    // for optional parameters to allow to be empty
+    let avoidDuplicatedSlash: boolean = false
      for (const segment of segments) {
        if (!avoidDuplicatedSlash || path[path.length - 1] !== '/') path += '/'
        avoidDuplicatedSlash = false
@@ -378,10 +228,12 @@ export function tokensToParser(
              throw new Error(
                `Provided param "${value}" is an array but it is not repeatable (* or + modifiers)`
              )
+          // TODO: encode, decode values, make sure that wilcard regexp do not encode the `/`
            const text: string = Array.isArray(param) ? param.join('/') : param
            if (!text) {
-            if (!optional) throw new Error(`Missing required param "${value}"`)
-            else avoidDuplicatedSlash = true
+            // do not append a slash on the next iteration
+            if (optional) avoidDuplicatedSlash = true
+            else throw new Error(`Missing required param "${value}"`)
            }
            path += text
          }
@@ -400,16 +252,26 @@ export function tokensToParser(
    }
  }
  
-export function compareScoreArray(a: number[], b: number[]): number {
+/**
+ * Compares an array of numbers as used in PathParser.score and returns a
+ * number. This function can be used to `sort` an array
+ * @param a first array of numbers
+ * @param b second array of numbers
+ * @returns 0 if both are equal, < 0 if a should be sorted first, > 0 if b
+ * should be sorted first
+ */
+function compareScoreArray(a: number[], b: number[]): number {
    let i = 0
    while (i < a.length && i < b.length) {
-    if (a[i] < b[i]) return 1
-    if (a[i] > b[i]) return -1
+    const diff = b[i] - a[i]
+    // only keep going if diff === 0
+    if (diff) return diff
  
      i++
    }
  
-  // if the last subsegment was Static, the shorter
+  // if the last subsegment was Static, the shorter segments should be sorted first
+  // otherwise sort the longest segment first
    if (a.length < b.length) {
      return a.length === 1 && a[0] === PathScore.Static + PathScore.Segment
        ? -1
@@ -423,6 +285,12 @@ export function compareScoreArray(a: number[], b: number[]): number {
    return 0
  }
  
+/**
+ * Compare function that can be used with `sort` to sort an array of PathParser
+ * @param a first PathParser
+ * @param b second PathParser
+ * @returns 0 if both are equal, < 0 if a should be sorted first, > 0 if b
+ */
  export function comparePathParserScore(a: PathParser, b: PathParser): number {
    let i = 0
    const aScore = a.score
@@ -435,13 +303,12 @@ export function comparePathParserScore(a: PathParser, b: PathParser): number {
      i++
    }
  
-  // TODO: one is this way the other the opposite it's more complicated than
-  // that because with subsegments the length matters while with segment it
-  // doesnt (1 vs 1+). So I need to treat the first entry of each array
-  // differently
-  return aScore.length < bScore.length
-    ? 1
-    : aScore.length > bScore.length
-    ? -1
-    : 0
+  // if a and b share the same score entries but b has more, sort b first
+  return bScore.length - aScore.length
+  // this is the ternary version
+  // return aScore.length < bScore.length
+  //   ? 1
+  //   : aScore.length > bScore.length
+  //   ? -1
+  //   : 0
  }
diff --git a/src/matcher/path-ranker.ts b/src/matcher/path-ranker.ts

deleted file mode 100644 (file)

index ca6ccf0..0000000
--- a/src/matcher/path-ranker.ts
+++ /dev/null
@@ -1,11 +0,0 @@
-export function comparePathParserScore(a: number[], b: number[]): number {
-  let i = 0
-  while (i < a.length && i < b.length) {
-    if (a[i] < b[i]) return 1
-    if (a[i] > b[i]) return -1
-
-    i++
-  }
-
-  return a.length < b.length ? 1 : a.length > b.length ? -1 : 0
-}
diff --git a/src/matcher/path-tokenizer.ts b/src/matcher/path-tokenizer.ts

new file mode 100644 (file)

index 0000000..d68bad3
--- /dev/null
+++ b/src/matcher/path-tokenizer.ts
@@ -0,0 +1,185 @@
+export const enum TokenType {
+  Static,
+  Param,
+  Group,
+}
+
+const enum TokenizerState {
+  Static,
+  Param,
+  ParamRegExp, // custom re for a param
+  ParamRegExpEnd, // check if there is any ? + *
+  EscapeNext,
+}
+
+interface TokenStatic {
+  type: TokenType.Static
+  value: string
+}
+
+interface TokenParam {
+  type: TokenType.Param
+  regexp?: string
+  value: string
+  optional: boolean
+  repeatable: boolean
+}
+
+interface TokenGroup {
+  type: TokenType.Group
+  value: Exclude<Token, TokenGroup>[]
+}
+
+export type Token = TokenStatic | TokenParam | TokenGroup
+
+const ROOT_TOKEN: Token = {
+  type: TokenType.Static,
+  value: '',
+}
+
+const VALID_PARAM_RE = /[a-zA-Z0-9_]/
+
+export function tokenizePath(path: string): Array<Token[]> {
+  if (!path) return [[]]
+  if (path === '/') return [[ROOT_TOKEN]]
+  // remove the leading slash
+  if (path[0] !== '/') throw new Error('A non-empty path must start with "/"')
+
+  function crash(message: string) {
+    throw new Error(`ERR (${state})/"${buffer}": ${message}`)
+  }
+
+  let state: TokenizerState = TokenizerState.Static
+  let previousState: TokenizerState = state
+  const tokens: Array<Token[]> = []
+  // the segment will always be valid because we get into the initial state
+  // with the leading /
+  let segment!: Token[]
+
+  function finalizeSegment() {
+    if (segment) tokens.push(segment)
+    segment = []
+  }
+
+  // index on the path
+  let i = 0
+  // char at index
+  let char: string
+  // buffer of the value read
+  let buffer: string = ''
+  // custom regexp for a param
+  let customRe: string = ''
+
+  function consumeBuffer() {
+    if (!buffer) return
+
+    if (state === TokenizerState.Static) {
+      segment.push({
+        type: TokenType.Static,
+        value: buffer,
+      })
+    } else if (
+      state === TokenizerState.Param ||
+      state === TokenizerState.ParamRegExp ||
+      state === TokenizerState.ParamRegExpEnd
+    ) {
+      if (segment.length > 1 && (char === '*' || char === '+'))
+        crash(
+          `A repeatable param (${buffer}) must be alone in its segment. eg: '/:ids+.`
+        )
+      segment.push({
+        type: TokenType.Param,
+        value: buffer,
+        regexp: customRe,
+        repeatable: char === '*' || char === '+',
+        optional: char === '*' || char === '?',
+      })
+    } else {
+      crash('Invalid state to consume buffer')
+    }
+    buffer = ''
+  }
+
+  function addCharToBuffer() {
+    buffer += char
+  }
+
+  while (i < path.length) {
+    char = path[i++]
+
+    if (char === '\\' && state !== TokenizerState.ParamRegExp) {
+      previousState = state
+      state = TokenizerState.EscapeNext
+      continue
+    }
+
+    switch (state) {
+      case TokenizerState.Static:
+        if (char === '/') {
+          if (buffer) {
+            consumeBuffer()
+          }
+          finalizeSegment()
+        } else if (char === ':') {
+          consumeBuffer()
+          state = TokenizerState.Param
+        } else if (char === '{') {
+          // TODO: handle group
+          addCharToBuffer()
+        } else {
+          addCharToBuffer()
+        }
+        break
+
+      case TokenizerState.EscapeNext:
+        addCharToBuffer()
+        state = previousState
+        break
+
+      case TokenizerState.Param:
+        if (char === '(') {
+          state = TokenizerState.ParamRegExp
+          customRe = ''
+        } else if (VALID_PARAM_RE.test(char)) {
+          addCharToBuffer()
+        } else {
+          consumeBuffer()
+          state = TokenizerState.Static
+          // go back one character if we were not modifying
+          if (char !== '*' && char !== '?' && char !== '+') i--
+        }
+        break
+
+      case TokenizerState.ParamRegExp:
+        if (char === ')') {
+          // handle the escaped )
+          if (customRe[customRe.length - 1] == '\\')
+            customRe = customRe.slice(0, -1) + char
+          else state = TokenizerState.ParamRegExpEnd
+        } else {
+          customRe += char
+        }
+        break
+
+      case TokenizerState.ParamRegExpEnd:
+        // same as finalizing a param
+        consumeBuffer()
+        state = TokenizerState.Static
+        // go back one character if we were not modifying
+        if (char !== '*' && char !== '?' && char !== '+') i--
+        break
+
+      default:
+        crash('Unkwnonw state')
+        break
+    }
+  }
+
+  if (state === TokenizerState.ParamRegExp)
+    crash(`Unfinished custom RegExp for param "${buffer}"`)
+
+  consumeBuffer()
+  finalizeSegment()
+
+  return tokens
+}
author	Eduardo San Martin Morote <posva13@gmail.com>
	Wed, 18 Dec 2019 10:11:50 +0000 (11:11 +0100)
committer	Eduardo San Martin Morote <posva13@gmail.com>
	Wed, 18 Dec 2019 10:11:50 +0000 (11:11 +0100)
__tests__/matcher/path-parser.spec.ts		patch \| blob \| blame \| history
__tests__/matcher/path-ranking.spec.ts		patch \| blob \| blame \| history
src/matcher/path-parser-ranker.ts	[moved from src/matcher/tokenizer.ts with 53% similarity]	patch \| blob \| blame \| history
src/matcher/path-ranker.ts	[deleted file]	patch \| blob \| blame \| history
src/matcher/path-tokenizer.ts	[new file with mode: 0644]	patch \| blob