]>
Commit | Line | Data |
---|---|---|
b4c522fa | 1 | |
a3b38b77 | 2 | /* Copyright (C) 2010-2021 by The D Language Foundation, All Rights Reserved |
b4c522fa IB |
3 | * http://www.digitalmars.com |
4 | * Distributed under the Boost Software License, Version 1.0. | |
5 | * (See accompanying file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) | |
6 | * https://github.com/D-Programming-Language/dmd/blob/master/src/root/speller.c | |
7 | */ | |
8 | ||
f9ab59ff | 9 | #include "dsystem.h" |
b4c522fa IB |
10 | #include "speller.h" |
11 | ||
12 | const char idchars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; | |
13 | ||
14 | /************************************************** | |
15 | * combine a new result from the spell checker to | |
16 | * find the one with the closest symbol with | |
17 | * respect to the cost defined by the search function | |
18 | * Input/Output: | |
19 | * p best found spelling (NULL if none found yet) | |
20 | * cost cost of p (INT_MAX if none found yet) | |
21 | * Input: | |
22 | * np new found spelling (NULL if none found) | |
23 | * ncost cost of np if non-NULL | |
24 | * Returns: | |
25 | * true if the cost is less or equal 0 | |
26 | * false otherwise | |
27 | */ | |
28 | bool combineSpellerResult(void*& p, int& cost, void* np, int ncost) | |
29 | { | |
30 | if (np && ncost < cost) | |
31 | { | |
32 | p = np; | |
33 | cost = ncost; | |
34 | if (cost <= 0) | |
35 | return true; | |
36 | } | |
37 | return false; | |
38 | } | |
39 | ||
40 | void *spellerY(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg, | |
41 | const char *charset, size_t index, int* cost) | |
42 | { | |
43 | if (!seedlen) | |
44 | return NULL; | |
45 | assert(seed[seedlen] == 0); | |
46 | ||
47 | char tmp[30]; | |
48 | char *buf; | |
49 | if (seedlen <= sizeof(tmp) - 2) | |
50 | buf = tmp; | |
51 | else | |
52 | { | |
53 | buf = (char *)alloca(seedlen + 2); // leave space for extra char | |
54 | if (!buf) | |
55 | return NULL; // no matches | |
56 | } | |
57 | ||
58 | memcpy(buf, seed, index); | |
59 | *cost = INT_MAX; | |
60 | void* p = NULL; | |
ace4b1ac | 61 | int ncost = 0; |
b4c522fa IB |
62 | |
63 | /* Delete at seed[index] */ | |
64 | if (index < seedlen) | |
65 | { | |
66 | memcpy(buf + index, seed + index + 1, seedlen - index); | |
67 | assert(buf[seedlen - 1] == 0); | |
68 | void* np = (*fp)(fparg, buf, &ncost); | |
69 | if (combineSpellerResult(p, *cost, np, ncost)) | |
70 | return p; | |
71 | } | |
72 | ||
73 | if (charset && *charset) | |
74 | { | |
75 | /* Substitutions */ | |
76 | if (index < seedlen) | |
77 | { | |
78 | memcpy(buf, seed, seedlen + 1); | |
79 | for (const char *s = charset; *s; s++) | |
80 | { | |
81 | buf[index] = *s; | |
82 | ||
83 | //printf("sub buf = '%s'\n", buf); | |
84 | void* np = (*fp)(fparg, buf, &ncost); | |
85 | if (combineSpellerResult(p, *cost, np, ncost)) | |
86 | return p; | |
87 | } | |
88 | assert(buf[seedlen] == 0); | |
89 | } | |
90 | ||
91 | /* Insertions */ | |
92 | memcpy (buf + index + 1, seed + index, seedlen + 1 - index); | |
93 | ||
94 | for (const char *s = charset; *s; s++) | |
95 | { | |
96 | buf[index] = *s; | |
97 | ||
98 | //printf("ins buf = '%s'\n", buf); | |
99 | void* np = (*fp)(fparg, buf, &ncost); | |
100 | if (combineSpellerResult(p, *cost, np, ncost)) | |
101 | return p; | |
102 | } | |
103 | assert(buf[seedlen + 1] == 0); | |
104 | } | |
105 | ||
106 | return p; // return "best" result | |
107 | } | |
108 | ||
109 | void *spellerX(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg, | |
110 | const char *charset, int flag) | |
111 | { | |
112 | if (!seedlen) | |
113 | return NULL; | |
114 | ||
115 | char tmp[30]; | |
116 | char *buf; | |
117 | if (seedlen <= sizeof(tmp) - 2) | |
118 | buf = tmp; | |
119 | else | |
120 | { | |
121 | buf = (char *)alloca(seedlen + 2); // leave space for extra char | |
122 | if (!buf) | |
123 | return NULL; // no matches | |
124 | } | |
ace4b1ac | 125 | int cost = INT_MAX, ncost = 0; |
b4c522fa IB |
126 | void *p = NULL, *np; |
127 | ||
128 | /* Deletions */ | |
129 | memcpy(buf, seed + 1, seedlen); | |
130 | for (size_t i = 0; i < seedlen; i++) | |
131 | { | |
132 | //printf("del buf = '%s'\n", buf); | |
133 | if (flag) | |
134 | np = spellerY(buf, seedlen - 1, fp, fparg, charset, i, &ncost); | |
135 | else | |
136 | np = (*fp)(fparg, buf, &ncost); | |
137 | if (combineSpellerResult(p, cost, np, ncost)) | |
138 | return p; | |
139 | ||
140 | buf[i] = seed[i]; | |
141 | } | |
142 | ||
143 | /* Transpositions */ | |
144 | if (!flag) | |
145 | { | |
146 | memcpy(buf, seed, seedlen + 1); | |
147 | for (size_t i = 0; i + 1 < seedlen; i++) | |
148 | { | |
149 | // swap [i] and [i + 1] | |
150 | buf[i] = seed[i + 1]; | |
151 | buf[i + 1] = seed[i]; | |
152 | ||
153 | //printf("tra buf = '%s'\n", buf); | |
154 | if (combineSpellerResult(p, cost, (*fp)(fparg, buf, &ncost), ncost)) | |
155 | return p; | |
156 | ||
157 | buf[i] = seed[i]; | |
158 | } | |
159 | } | |
160 | ||
161 | if (charset && *charset) | |
162 | { | |
163 | /* Substitutions */ | |
164 | memcpy(buf, seed, seedlen + 1); | |
165 | for (size_t i = 0; i < seedlen; i++) | |
166 | { | |
167 | for (const char *s = charset; *s; s++) | |
168 | { | |
169 | buf[i] = *s; | |
170 | ||
171 | //printf("sub buf = '%s'\n", buf); | |
172 | if (flag) | |
173 | np = spellerY(buf, seedlen, fp, fparg, charset, i + 1, &ncost); | |
174 | else | |
175 | np = (*fp)(fparg, buf, &ncost); | |
176 | if (combineSpellerResult(p, cost, np, ncost)) | |
177 | return p; | |
178 | } | |
179 | buf[i] = seed[i]; | |
180 | } | |
181 | ||
182 | /* Insertions */ | |
183 | memcpy(buf + 1, seed, seedlen + 1); | |
184 | for (size_t i = 0; i <= seedlen; i++) // yes, do seedlen+1 iterations | |
185 | { | |
186 | for (const char *s = charset; *s; s++) | |
187 | { | |
188 | buf[i] = *s; | |
189 | ||
190 | //printf("ins buf = '%s'\n", buf); | |
191 | if (flag) | |
192 | np = spellerY(buf, seedlen + 1, fp, fparg, charset, i + 1, &ncost); | |
193 | else | |
194 | np = (*fp)(fparg, buf, &ncost); | |
195 | if (combineSpellerResult(p, cost, np, ncost)) | |
196 | return p; | |
197 | } | |
198 | buf[i] = seed[i]; // going past end of seed[] is ok, as we hit the 0 | |
199 | } | |
200 | } | |
201 | ||
202 | return p; // return "best" result | |
203 | } | |
204 | ||
205 | /************************************************** | |
206 | * Looks for correct spelling. | |
207 | * Currently only looks a 'distance' of one from the seed[]. | |
208 | * This does an exhaustive search, so can potentially be very slow. | |
209 | * Input: | |
210 | * seed wrongly spelled word | |
211 | * fp search function | |
212 | * fparg argument to search function | |
213 | * charset character set | |
214 | * Returns: | |
215 | * NULL no correct spellings found | |
216 | * void* value returned by fp() for first possible correct spelling | |
217 | */ | |
218 | ||
219 | void *speller(const char *seed, fp_speller_t fp, void *fparg, const char *charset) | |
220 | { | |
221 | size_t seedlen = strlen(seed); | |
222 | size_t maxdist = seedlen < 4 ? seedlen / 2 : 2; | |
223 | for (size_t distance = 0; distance < maxdist; distance++) | |
224 | { void *p = spellerX(seed, seedlen, fp, fparg, charset, distance); | |
225 | if (p) | |
226 | return p; | |
227 | // if (seedlen > 10) | |
228 | // break; | |
229 | } | |
230 | return NULL; // didn't find it | |
231 | } |