]>
Commit | Line | Data |
---|---|---|
277fe616 | 1 | /* Find near-matches for strings and identifiers. |
99dee823 | 2 | Copyright (C) 2015-2021 Free Software Foundation, Inc. |
277fe616 DM |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #ifndef GCC_SPELLCHECK_H | |
21 | #define GCC_SPELLCHECK_H | |
22 | ||
23 | typedef unsigned int edit_distance_t; | |
24 | const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX; | |
25 | ||
8ece8dfb | 26 | /* spellcheck.c */ |
93ebf1fd | 27 | extern edit_distance_t |
b80a188b DM |
28 | get_edit_distance (const char *s, int len_s, |
29 | const char *t, int len_t); | |
93ebf1fd | 30 | |
277fe616 | 31 | extern edit_distance_t |
b80a188b | 32 | get_edit_distance (const char *s, const char *t); |
277fe616 | 33 | |
61789eed DM |
34 | extern const char * |
35 | find_closest_string (const char *target, | |
36 | const auto_vec<const char *> *candidates); | |
37 | ||
6a3f203c DM |
38 | /* A traits class for describing a string-like type usable by |
39 | class best_match. | |
40 | Specializations should provide the implementations of the following: | |
8ece8dfb | 41 | |
6a3f203c DM |
42 | static size_t get_length (TYPE); |
43 | static const char *get_string (TYPE); | |
44 | ||
45 | get_string should return a non-NULL ptr, which does not need to be | |
46 | 0-terminated. */ | |
47 | ||
48 | template <typename TYPE> | |
49 | struct edit_distance_traits {}; | |
50 | ||
f4452176 DM |
51 | /* Specialization of edit_distance_traits for C-style strings. */ |
52 | ||
53 | template <> | |
54 | struct edit_distance_traits<const char *> | |
55 | { | |
56 | static size_t get_length (const char *str) | |
57 | { | |
58 | gcc_assert (str); | |
59 | return strlen (str); | |
60 | } | |
61 | ||
62 | static const char *get_string (const char *str) | |
63 | { | |
64 | gcc_assert (str); | |
65 | return str; | |
66 | } | |
67 | }; | |
68 | ||
07f87905 DM |
69 | extern edit_distance_t get_edit_distance_cutoff (size_t goal_len, |
70 | size_t candidate_len); | |
71 | ||
6a3f203c DM |
72 | /* A type for use when determining the best match against a string, |
73 | expressed as a template so that we can match against various | |
74 | string-like types (const char *, frontend identifiers, and preprocessor | |
75 | macros). | |
76 | ||
77 | This type accumulates the best possible match against GOAL_TYPE for | |
78 | a sequence of elements of CANDIDATE_TYPE, whilst minimizing the | |
b80a188b | 79 | number of calls to get_edit_distance and to |
6a3f203c DM |
80 | edit_distance_traits<T>::get_length. */ |
81 | ||
82 | template <typename GOAL_TYPE, typename CANDIDATE_TYPE> | |
83 | class best_match | |
84 | { | |
85 | public: | |
86 | typedef GOAL_TYPE goal_t; | |
87 | typedef CANDIDATE_TYPE candidate_t; | |
88 | typedef edit_distance_traits<goal_t> goal_traits; | |
89 | typedef edit_distance_traits<candidate_t> candidate_traits; | |
90 | ||
91 | /* Constructor. */ | |
92 | ||
1a4f11c8 DM |
93 | best_match (GOAL_TYPE goal, |
94 | edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE) | |
6a3f203c DM |
95 | : m_goal (goal_traits::get_string (goal)), |
96 | m_goal_len (goal_traits::get_length (goal)), | |
97 | m_best_candidate (NULL), | |
1a4f11c8 | 98 | m_best_distance (best_distance_so_far) |
6a3f203c DM |
99 | {} |
100 | ||
101 | /* Compare the edit distance between CANDIDATE and m_goal, | |
102 | and if it's the best so far, record it. */ | |
103 | ||
104 | void consider (candidate_t candidate) | |
105 | { | |
106 | size_t candidate_len = candidate_traits::get_length (candidate); | |
107 | ||
108 | /* Calculate a lower bound on the candidate's distance to the goal, | |
109 | based on the difference in lengths; it will require at least | |
110 | this many insertions/deletions. */ | |
111 | edit_distance_t min_candidate_distance | |
112 | = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len); | |
113 | ||
114 | /* If the candidate's length is sufficiently different to that | |
115 | of the goal string, then the number of insertions/deletions | |
116 | may be >= the best distance so far. If so, we can reject | |
117 | the candidate immediately without needing to compute | |
118 | the exact distance, since it won't be an improvement. */ | |
119 | if (min_candidate_distance >= m_best_distance) | |
120 | return; | |
121 | ||
122 | /* If the candidate will be unable to beat the criterion in | |
123 | get_best_meaningful_candidate, reject it without computing | |
124 | the exact distance. */ | |
07f87905 | 125 | edit_distance_t cutoff = get_cutoff (candidate_len); |
6a3f203c DM |
126 | if (min_candidate_distance > cutoff) |
127 | return; | |
128 | ||
129 | /* Otherwise, compute the distance and see if the candidate | |
130 | has beaten the previous best value. */ | |
131 | edit_distance_t dist | |
b80a188b DM |
132 | = get_edit_distance (m_goal, m_goal_len, |
133 | candidate_traits::get_string (candidate), | |
134 | candidate_len); | |
6a3f203c DM |
135 | if (dist < m_best_distance) |
136 | { | |
137 | m_best_distance = dist; | |
138 | m_best_candidate = candidate; | |
139 | m_best_candidate_len = candidate_len; | |
140 | } | |
141 | } | |
142 | ||
1a4f11c8 DM |
143 | /* Assuming that BEST_CANDIDATE is known to be better than |
144 | m_best_candidate, update (without recomputing the edit distance to | |
145 | the goal). */ | |
146 | ||
147 | void set_best_so_far (CANDIDATE_TYPE best_candidate, | |
148 | edit_distance_t best_distance, | |
149 | size_t best_candidate_len) | |
150 | { | |
151 | gcc_assert (best_distance < m_best_distance); | |
152 | m_best_candidate = best_candidate; | |
153 | m_best_distance = best_distance; | |
154 | m_best_candidate_len = best_candidate_len; | |
155 | } | |
156 | ||
07f87905 DM |
157 | /* Generate the maximum edit distance for which we consider a suggestion |
158 | to be meaningful, given a candidate of length CANDIDATE_LEN. */ | |
159 | ||
160 | edit_distance_t get_cutoff (size_t candidate_len) const | |
161 | { | |
162 | return ::get_edit_distance_cutoff (m_goal_len, candidate_len); | |
163 | } | |
164 | ||
6a3f203c DM |
165 | /* Get the best candidate so far, but applying a filter to ensure |
166 | that we return NULL if none of the candidates are close to the goal, | |
167 | to avoid offering nonsensical suggestions to the user. */ | |
168 | ||
169 | candidate_t get_best_meaningful_candidate () const | |
170 | { | |
07f87905 DM |
171 | /* If the edit distance is too high, the suggestion is likely to be |
172 | meaningless. */ | |
6a3f203c DM |
173 | if (m_best_candidate) |
174 | { | |
07f87905 | 175 | edit_distance_t cutoff = get_cutoff (m_best_candidate_len); |
6a3f203c DM |
176 | if (m_best_distance > cutoff) |
177 | return NULL; | |
178 | } | |
8bf3cdff DM |
179 | |
180 | /* If the goal string somehow makes it into the candidate list, offering | |
181 | it as a suggestion will be nonsensical e.g. | |
182 | 'constexpr' does not name a type; did you mean 'constexpr'? | |
183 | Ultimately such suggestions are due to bugs in constructing the | |
184 | candidate list, but as a band-aid, do not offer suggestions for | |
185 | distance == 0 (where candidate == goal). */ | |
186 | if (m_best_distance == 0) | |
187 | return NULL; | |
188 | ||
6a3f203c DM |
189 | return m_best_candidate; |
190 | } | |
277fe616 | 191 | |
01ada121 DM |
192 | /* Get the closest candidate so far, without applying any filtering. */ |
193 | ||
194 | candidate_t blithely_get_best_candidate () const | |
195 | { | |
196 | return m_best_candidate; | |
197 | } | |
198 | ||
1a4f11c8 DM |
199 | edit_distance_t get_best_distance () const { return m_best_distance; } |
200 | size_t get_best_candidate_length () const { return m_best_candidate_len; } | |
201 | ||
6a3f203c DM |
202 | private: |
203 | const char *m_goal; | |
204 | size_t m_goal_len; | |
205 | candidate_t m_best_candidate; | |
206 | edit_distance_t m_best_distance; | |
207 | size_t m_best_candidate_len; | |
208 | }; | |
8ece8dfb | 209 | |
277fe616 | 210 | #endif /* GCC_SPELLCHECK_H */ |