Bug Summary

File:d/url.c
Warning:line 151, column 5
Duplicate code detected
Note:line 411, column 5
Similar code here

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name url.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/isvv/naviserver/nsd -resource-dir /usr/local/lib/clang/15.0.0 -D _FORTIFY_SOURCE=2 -D NDEBUG -D SYSTEM_MALLOC -I ../include -I /usr/include/tcl8.6 -D HAVE_CONFIG_H -internal-isystem /usr/local/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -std=c99 -fdebug-compilation-dir=/home/isvv/naviserver/nsd -ferror-limit 19 -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-checker alpha -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-07-23-130959-11103-1 -x c url.c
1/*
2 * The contents of this file are subject to the Mozilla Public License
3 * Version 1.1 (the "License"); you may not use this file except in
4 * compliance with the License. You may obtain a copy of the License at
5 * http://mozilla.org/.
6 *
7 * Software distributed under the License is distributed on an "AS IS"
8 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
9 * the License for the specific language governing rights and limitations
10 * under the License.
11 *
12 * The Original Code is AOLserver Code and related documentation
13 * distributed by AOL.
14 *
15 * The Initial Developer of the Original Code is America Online,
16 * Inc. Portions created by AOL are Copyright (C) 1999 America Online,
17 * Inc. All Rights Reserved.
18 *
19 * Alternatively, the contents of this file may be used under the terms
20 * of the GNU General Public License (the "GPL"), in which case the
21 * provisions of GPL are applicable instead of those above. If you wish
22 * to allow use of your version of this file only under the terms of the
23 * GPL and not to allow others to use your version of this file under the
24 * License, indicate your decision by deleting the provisions above and
25 * replace them with the notice and other provisions required by the GPL.
26 * If you do not delete the provisions above, a recipient may use your
27 * version of this file under either the License or the GPL.
28 */
29
30
31/*
32 * url.c --
33 *
34 * Parse URLs.
35 */
36
37#include "nsd.h"
38
39/*
40 * Local typedefs of functions
41 */
42
43/*
44 * Local functions defined in this file
45 */
46
47static char* ParseUpTo(char *chars, char ch)
48 NS_GNUC_NONNULL(1)__attribute__((__nonnull__(1)));
49
50
51/*
52 *----------------------------------------------------------------------
53 *
54 * Ns_RelativeUrl --
55 *
56 * If the url passed in is for this server, then the initial
57 * part of the URL is stripped off. e.g., on a server whose
58 * location is http://www.foo.com, Ns_RelativeUrl of
59 * "http://www.foo.com/hello" will return "/hello".
60 *
61 * Results:
62 * A pointer to the beginning of the relative url in the
63 * passed-in url, or NULL if error.
64 *
65 * Side effects:
66 * Will set errno on error.
67 *
68 *----------------------------------------------------------------------
69 */
70
71const char *
72Ns_RelativeUrl(const char *url, const char *location)
73{
74 const char *v, *result;
75
76 if (url == NULL((void*)0) || location == NULL((void*)0)) {
77 result = NULL((void*)0);
78 } else {
79
80 /*
81 * Ns_Match will return the point in URL where location stops
82 * being equal to it because location ends.
83 *
84 * e.g., if location = "http://www.foo.com" and
85 * url="http://www.foo.com/a/b" then after the call,
86 * v="/a/b", or NULL if there's a mismatch.
87 */
88
89 v = Ns_Match(location, url);
90 if (v != NULL((void*)0)) {
91 url = v;
92 }
93 while (url[0] == '/' && url[1] == '/') {
94 ++url;
95 }
96 result = url;
97 }
98 return result;
99}
100
101
102/*
103 *----------------------------------------------------------------------
104 *
105 * ParseUserInfo --
106 *
107 * Parse the user-info part from the "authority" part of a URL
108 *
109 * authority = [ userinfo "@" ] host [ ":" port ]
110 *
111 * and return the reminded of the string.
112 *
113 * Results:
114 * String starting with the "host" part.
115 *
116 * Side effects:
117 *
118 * In case the "authority" contains "userinfo", it is returned via the
119 * pointer in the second argument.
120 *
121 *----------------------------------------------------------------------
122 */
123
124static char *
125ParseUserInfo(char *chars, char **userinfo)
126{
127 char *p;
128
129 /*
130 * RFC 3986 defines
131 *
132 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
133 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
134 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
135 * / "*" / "+" / "," / ";" / "="
136 *
137 * ALPHA = (%41-%5A and %61-%7A)
138 * DIGIT = (%30-%39),
139 * hyphen (%2D), period (%2E), underscore (%5F), tilde (%7E)
140 * exclam (%21) dollar (%24) amp (%26) singlequote (%27)
141 * lparen (%28) lparen (%29) asterisk (%2A) plus (%2B)
142 * comma (%2C) semicolon (%3B) equals (%3D)
143 *
144 * colon (%3a)
145 *
146 * Percent-encoded is just checked by the character range, but does not
147 * check the two following (number) chars.
148 *
149 * percent (%25) ... for percent-encoded
150 */
151 static const bool_Bool userinfo_table[256] = {
Duplicate code detected
152 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
153 /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
154 /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
155 /* 0x20 */ 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
156 /* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0,
157 /* 0x40 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
159 /* 0x60 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
161 /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
162 /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
163 /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
164 /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165 /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
168 /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
169 };
170
171 NS_NONNULL_ASSERT(chars != NULL)((void) (0));
172 NS_NONNULL_ASSERT(userinfo != NULL)((void) (0));
173
174 for (p = chars; userinfo_table[UCHAR(*p)((unsigned char)(*p))] != 0; p++) {
175 ;
176 }
177
178 if (*p == '\x40') {
179 *userinfo = chars;
180 *p = '\0';
181 chars = p+1;
182 } else {
183 *userinfo = NULL((void*)0);
184 }
185 /*fprintf(stderr, "==== userinfo p %.2x, '%s'\n", *p, chars);*/
186
187 return chars;
188}
189
190
191/*
192 *----------------------------------------------------------------------
193 *
194 * ParseUpTo --
195 *
196 * Helper function of Ns_ParseUrl(). Return the characters up to a
197 * specified character and terminate the parsed string by a NUL
198 * character. The string is searched from left to right. If the
199 * character does not exist in the string, return NULL.
200 *
201 * Results:
202 * Parsed string or NULL.
203 *
204 * Side effects:
205 * None.
206 *
207 *----------------------------------------------------------------------
208 */
209
210static char *
211ParseUpTo(char *chars, char ch)
212{
213 char *p = strchr(chars, INTCHAR(ch)((int)((unsigned char)((ch)))));
214
215 if (p != NULL((void*)0)) {
216 *p++ = '\0';
217 }
218 return p;
219}
220
221/*
222 *----------------------------------------------------------------------
223 *
224 * ValidateChars --
225 *
226 * Helper function of Ns_ParseUrl(). Scan a string up to the end based on
227 * the provided table of valid characters.
228 *
229 * Results:
230 *
231 * When the string is valid, it is returned unmodified. in case it contains
232 * errors, NULL is returned and the error message is set.
233 *
234 * Side effects:
235 * None.
236 *
237 *----------------------------------------------------------------------
238 */
239
240static char *
241ValidateChars(char *chars, const bool_Bool *table, const char *msg, const char** errorMsg)
242{
243 char *p, *result;
244
245 for (p = chars; table[UCHAR(*p)((unsigned char)(*p))] != 0; p++) {
246 ;
247 }
248 if (*p == '\0') {
249 result = chars;
250 } else {
251 *errorMsg = msg;
252 result = NULL((void*)0);
253 }
254 return result;
255}
256
257
258/*
259 *----------------------------------------------------------------------
260 *
261 * Ns_ParseUrl --
262 *
263 * Parse a URL into its component parts
264 *
265 * Results:
266 * NS_OK or NS_ERROR
267 *
268 * Side effects:
269 * Pointers to the protocol, host, port, path, and "tail" (last
270 * path element) will be set by reference in the passed-in pointers.
271 * The passed-in url will be modified.
272 *
273 *----------------------------------------------------------------------
274 */
275Ns_ReturnCode
276Ns_ParseUrl(char *url, bool_Bool strict, Ns_URL *urlPtr, const char **errorMsg)
277{
278 char *end;
279
280 /*
281 * RFC 3986 defines
282 *
283 * foo://example.com:8042/over/there?name=ferret#nose
284 * \_/ \______________/\_________/ \_________/ \__/
285 * | | | | |
286 * scheme authority path query fragment
287 *
288 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
289 * ALPHA = (%41-%5A and %61-%7A)
290 * DIGIT = (%30-%39),
291 * plus (%2B) hyphen (%2D), period (%2E),
292 *
293 * underscore (%5F), tilde (%7E)
294 */
295
296 static const bool_Bool scheme_table[256] = {
297 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
298 /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
300 /* 0x20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0,
301 /* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
302 /* 0x40 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
303 /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
304 /* 0x60 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
305 /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
306 /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307 /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
308 /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
309 /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
310 /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311 /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
312 /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
313 /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
314 };
315
316 /*
317 * RFC 3986 defines (simplified)
318 *
319 * path = path-abempty ; begins with "/" or is empty
320 * / path-absolute ; begins with "/" but not "//"
321 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
322 * segment = *pchar
323 * segment-nz = 1*pchar
324 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
325 *
326 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
327 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
328 * / "*" / "+" / "," / ";" / "="
329 *
330 * ALPHA = (%41-%5A and %61-%7A)
331 * DIGIT = (%30-%39),
332 * hyphen (%2D), period (%2E), underscore (%5F), tilde (%7E)
333 * exclam (%21) dollar (%24) amp (%26) singlequote (%27)
334 * lparen (%28) lparen (%29) asterisk (%2A) plus (%2B)
335 * comma (%2C) semicolon (%3B) equals (%3D)
336 *
337 * slash (%2F) colon (%3A) at (%40)
338 *
339 * Percent-encoded is just checked by the character range, but does not
340 * check the two following (number) chars.
341 *
342 * percent (%25) ... for percent-encoded
343 */
344
345 static const bool_Bool path_table[256] = {
346 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
347 /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
348 /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 /* 0x20 */ 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350 /* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0,
351 /* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
352 /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
353 /* 0x60 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
354 /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
355 /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
356 /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
357 /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
358 /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
359 /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
360 /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
361 /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
363 };
364
365 /*
366 * RFC 3986 defines
367 *
368 * query = *( pchar / "/" / "?" )
369 * fragment = *( pchar / "/" / "?" )
370 *
371 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
372 *
373 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
374 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
375 * / "*" / "+" / "," / ";" / "="
376 *
377 * ALPHA = (%41-%5A and %61-%7A)
378 * DIGIT = (%30-%39),
379 * hyphen (%2D), period (%2E), underscore (%5F), tilde (%7E)
380 * exclam (%21) dollar (%24) amp (%26) singlequote (%27)
381 * lparen (%28) lparen (%29) asterisk (%2A) plus (%2B)
382 * comma (%2C) semicolon (%3B) equals (%3D)
383 *
384 * slash (%2F) colon (%3A) question mark (%3F) at (%40)
385 *
386 * Percent-encoded is just checked by the character range, but does not
387 * check the two following (number) chars.
388 *
389 * percent (%25) ... for percent-encoded
390 */
391
392 static const bool_Bool fragment_table[256] = {
393 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
394 /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
395 /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 /* 0x20 */ 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
397 /* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
398 /* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
399 /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
400 /* 0x60 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
401 /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
402 /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
404 /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408 /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
409 /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
410 };
411 static const bool_Bool alpha_table[256] = {
Similar code here
412 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
413 /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
414 /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 /* 0x20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
416 /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
417 /* 0x40 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
418 /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
419 /* 0x60 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
420 /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
421 /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
422 /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
423 /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
424 /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
425 /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
426 /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
427 /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
428 /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
429 };
430
431 NS_NONNULL_ASSERT(urlPtr)((void) (0));
432
433 memset(urlPtr, 0, sizeof(Ns_URL));
434
435 /*
436 * Set variable "end" to the end of the protocol
437 * http://www.foo.com:8000/baz/blah/spoo.html
438 * ^
439 * +--end
440 */
441
442 if (alpha_table[UCHAR(*url)((unsigned char)(*url))]) {
443 for (end = url+1; scheme_table[UCHAR(*end)((unsigned char)(*end))] != 0; end++) {
444 ;
445 }
446 } else {
447 end = url;
448 }
449 if (end != url && *end == ':') {
450 /*
451 * There is a protocol specified. Clear out the colon.
452 * Set pprotocol to the start of the protocol, and url to
453 * the first character after the colon.
454 *
455 * http\0//www.foo.com:8000/baz/blah/spoo.html
456 * ^ ^ ^
457 * | | +-- url
458 * | +-- end
459 * +-------- protocol
460 */
461
462 *end = '\0';
463 urlPtr->protocol = url;
464 url = end + 1;
465 /*fprintf(stderr, "SCHEME looks ok: %s\n", *pprotocol);*/
466
467 } else if (*end != '/' && *end != '?' && *end != '#' && *end != '\0' ) {
468 /*
469 * We do not have an explicit relative URL starting with a
470 * slash. Accept relative URL based on the heuristic to avoid getting
471 * every non-accepted scheme here (the remainding URL must not have a
472 * colon before a slash.
473 */
474 char *p;
475
476 for (p = end; *p != '\0' && *p != '/'; p++) {
477 if (*p == ':') {
478 /*
479 * We have a colon before the slash or end, do not accept
480 * this.
481 */
482 Ns_Log(Debug, "URI scheme does not look ok: last char 0x%.2x '%s'",
483 *end, url);
484 *errorMsg = "invalid scheme";
485 return NS_ERROR;
486 }
487 }
488 }
489
490
491 if (url[0] == '/' && url[1] == '/') {
492 bool_Bool hostParsedOk;
493
494 urlPtr->path = (char *)"";
495 urlPtr->tail = (char *)"";
496
497 /*
498 * The URL starts with two slashes, which means an authority part
499 * (host) is specified. Advance url past that and set *phost.
500 *
501 * http\0//www.foo.com:8000/baz/blah/spoo.html
502 * ^ ^ ^
503 * | | +-- url, *host
504 * | +-- end
505 * +-------- protocol
506 */
507 url = url + 2;
508
509 /*
510 * RFC 3986 defines
511 *
512 * authority = [ userinfo "@" ] host [ ":" port ]
513 *
514 */
515 url = ParseUserInfo(url, &urlPtr->userinfo);
516 urlPtr->host = url;
517
518 /*
519 * Parse authority part and return the optional string pointing to the
520 * port.
521 */
522 hostParsedOk = Ns_HttpParseHost2(url, strict, &urlPtr->host, &urlPtr->port, &end);
523 if (!hostParsedOk) {
524 *errorMsg = "invalid authority";
525 return NS_ERROR;
526 }
527
528 if (urlPtr->port != NULL((void*)0)) {
529
530 /*
531 * A port was specified. Set urlPtr->port to the first
532 * digit.
533 *
534 * http\0//www.foo.com\08000/baz/blah/spoo.html
535 * ^ ^ ^ ^
536 * | +-- host | +------ url, port
537 * +----- protocol +--- end
538 */
539
540 url = urlPtr->port;
541 urlPtr->port = url;
542 }
543 } else {
544 end = url;
545 }
546 /*
547 * "end" points now either to
548 * - the string terminator (NUL)
549 * - the slash which starts the path/tail, or to
550 * - one of the remaining components (query, or fragment)
551 *
552 * http\0//www.foo.com\08000\0baz/blah/spoo.html
553 * ^ ^ ^ ^ ^
554 * | | | | +-- url
555 * | +-- host | +-- end
556 * +----- protocol +-- port
557 */
558 /*fprintf(stderr, "CHECK FOR PATH <%s>\n", end);*/
559
560
561 if (*end == '\0') {
562 /*
563 * No path, tail, query, fragment specified: we are done.
564 */
565
566 } else if (*end == '#') {
567 /*
568 * No path, tail, query, just a fragment specified.
569 * We could validate.
570 */
571 *end = '\0';
572 urlPtr->fragment = end + 1;
573
574 } else if (*end == '?') {
575 /*
576 * No path, tail, just a query and maybe a fragment specified.
577 */
578 *end = '\0';
579 urlPtr->query = end + 1;
580 urlPtr->fragment = ParseUpTo(urlPtr->query, '#');
581
582 } else {
583 if (*end == '/') {
584 urlPtr->path = (char *)"";
585 urlPtr->tail = (char *)"";
586
587 /*
588 * We have a path, tail, and maybe a query or fragment specified.
589 */
590 *end = '\0';
591 url = end + 1;
592 /*
593 * Set the path to URL and advance to the last slash.
594 * Set ptail to the character after that, or if there is none,
595 * it becomes path and path becomes an empty string.
596 *
597 * http\0//www.foo.com\08000\0baz/blah/spoo.html
598 * ^ ^ ^ ^ ^ ^^
599 * | | | | | |+-- tail
600 * | | | | | +-- end
601 * | | | | +-- path
602 * | +-- host | +-- end
603 * +----- protocol +-- port
604 */
605
606
607 /*
608 * Separate the "tail" from the "path", otherwise the string is
609 * just "tail".
610 */
611 urlPtr->query = ParseUpTo(url, '?');
612 if (urlPtr->query == NULL((void*)0)) {
613 urlPtr->fragment = ParseUpTo(url, '#');
614 }
615
616 end = strrchr(url, INTCHAR('/')((int)((unsigned char)(('/')))));
617 if (end == NULL((void*)0)) {
618 urlPtr->tail = url;
619 } else {
620 *end = '\0';
621 urlPtr->path = url;
622 urlPtr->tail = end + 1;
623 }
624
625 } else {
626 /*
627 * The URL starts with no slash, just set the "tail" and let
628 * "path" undefined (legacy NaviServer).
629 */
630 urlPtr->tail = end;
631 }
632
633 if (urlPtr->tail != NULL((void*)0)) {
634 if (urlPtr->query == NULL((void*)0)) {
635 urlPtr->query = ParseUpTo(urlPtr->tail, '?');
636 }
637 if (urlPtr->query != NULL((void*)0)) {
638 urlPtr->fragment = ParseUpTo(urlPtr->query, '#');
639 } else if (urlPtr->fragment == NULL((void*)0)) {
640 urlPtr->fragment = ParseUpTo(urlPtr->tail, '#');
641 }
642 }
643 if (strict) {
644 /*
645 * Validate content.
646 */
647 if (urlPtr->query != NULL((void*)0)) {
648 urlPtr->query = ValidateChars(urlPtr->query, fragment_table,
649 "query contains invalid character", errorMsg);
650 }
651 if (urlPtr->fragment != NULL((void*)0)) {
652 urlPtr->fragment = ValidateChars(urlPtr->fragment, fragment_table,
653 "fragment contains invalid character", errorMsg);
654 }
655 if (urlPtr->tail != NULL((void*)0)) {
656 urlPtr->tail = ValidateChars(urlPtr->tail, path_table,
657 "query contains invalid character", errorMsg);
658 }
659 if (urlPtr->path != NULL((void*)0)) {
660 urlPtr->path = ValidateChars(urlPtr->path, path_table,
661 "path contains invalid character", errorMsg);
662 }
663 }
664 }
665
666 return NS_OK;
667}
668
669
670/*
671 *----------------------------------------------------------------------
672 *
673 * Ns_AbsoluteUrl --
674 *
675 * Construct a URL based on baseurl but with as many parts of
676 * the incomplete url as possible.
677 *
678 * Results:
679 * NS_OK or NS_ERROR.
680 *
681 * Side effects:
682 * None.
683 *
684 *----------------------------------------------------------------------
685 */
686
687Ns_ReturnCode
688Ns_AbsoluteUrl(Ns_DStringTcl_DString *dsPtr, const char *url, const char *base)
689{
690 Ns_DStringTcl_DString urlDs, baseDs;
691 Ns_URL u, bu;
692 const char *errorMsg = NULL((void*)0);
693 Ns_ReturnCode status;
694
695 /*
696 * Copy the URL's to allow Ns_ParseUrl to destroy them.
697 */
698
699 Ns_DStringInitTcl_DStringInit(&urlDs);
700 Ns_DStringInitTcl_DStringInit(&baseDs);
701
702 /*
703 * The first part does not have to be a valid URL.
704 */
705 Ns_DStringAppend(&urlDs, url)Tcl_DStringAppend((&urlDs), (url), -1);
706 (void) Ns_ParseUrl(urlDs.string, NS_FALSE0, &u, &errorMsg);
707
708 Ns_DStringAppend(&baseDs, base)Tcl_DStringAppend((&baseDs), (base), -1);
709 status = Ns_ParseUrl(baseDs.string, NS_FALSE0, &bu, &errorMsg);
710
711 if (bu.protocol == NULL((void*)0) || bu.host == NULL((void*)0) || bu.path == NULL((void*)0)) {
712 status = NS_ERROR;
713 goto done;
714 }
715 if (u.protocol == NULL((void*)0)) {
716 u.protocol = bu.protocol;
717 }
718 assert(u.protocol != NULL)((void) (0));
719
720 if (u.host == NULL((void*)0)) {
721 u.host = bu.host;
722 u.port = bu.port;
723 }
724 assert(u.host != NULL)((void) (0));
725
726 if (u.path == NULL((void*)0)) {
727 u.path = bu.path;
728 }
729 assert(u.path != NULL)((void) (0));
730
731 if (strchr(u.host, INTCHAR(':')((int)((unsigned char)((':'))))) == NULL((void*)0)) {
732 /*
733 * We have to use IP literal notation to avoid ambiguity of colon
734 * (part of address or separator for port).
735 */
736 Ns_DStringVarAppend(dsPtr, u.protocol, "://", u.host, (char *)0L);
737 } else {
738 Ns_DStringVarAppend(dsPtr, u.protocol, "://[", u.host, "]", (char *)0L);
739 }
740 if (u.port != NULL((void*)0)) {
741 Ns_DStringVarAppend(dsPtr, ":", u.port, (char *)0L);
742 }
743 if (*u.path == '\0') {
744 Ns_DStringVarAppend(dsPtr, "/", u.tail, (char *)0L);
745 } else {
746 Ns_DStringVarAppend(dsPtr, "/", u.path, "/", u.tail, (char *)0L);
747 }
748done:
749 Ns_DStringFreeTcl_DStringFree(&urlDs);
750 Ns_DStringFreeTcl_DStringFree(&baseDs);
751
752 return status;
753}
754
755
756
757/*
758 *----------------------------------------------------------------------
759 *
760 * NsTclParseUrlObjCmd --
761 *
762 * Implements "ns_parseurl". Offers the functionality of
763 * Ns_ParseUrl on the Tcl layer.
764 *
765 * Results:
766 * Tcl result.
767 *
768 * Side effects:
769 * none
770 *
771 *----------------------------------------------------------------------
772 */
773
774int
775NsTclParseUrlObjCmd(ClientData UNUSED(clientData)UNUSED_clientData __attribute__((__unused__)), Tcl_Interp *interp, int objc, Tcl_Obj *const* objv)
776{
777 int result = TCL_OK0, strict = 0;
778 char *urlString;
779 Ns_ObjvSpec opts[] = {
780 {"-strict", Ns_ObjvBool, &strict, INT2PTR(NS_TRUE)((void *)(intptr_t)(1))},
781 {NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0)}
782 };
783 Ns_ObjvSpec args[] = {
784 {"url", Ns_ObjvString, &urlString, NULL((void*)0)},
785 {NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0)}
786 };
787
788 if (Ns_ParseObjv(opts, args, interp, 1, objc, objv) != NS_OK) {
789 result = TCL_ERROR1;
790 } else {
791 char *url;
792 Ns_URL u;
793 const char *errorMsg = NULL((void*)0);
794
795 url = ns_strdup(urlString);
796
797 if (Ns_ParseUrl(url, (bool_Bool)strict, &u, &errorMsg) == NS_OK) {
798 Tcl_Obj *resultObj = Tcl_NewListObj(0, NULL((void*)0));
799
800 if (u.protocol != NULL((void*)0)) {
801 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("proto", 5));
802 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.protocol, -1));
803 }
804 if (u.userinfo != NULL((void*)0)) {
805 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("userinfo", 8));
806 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.userinfo, -1));
807 }
808 if (u.host != NULL((void*)0)) {
809 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("host", 4));
810 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.host, -1));
811 }
812 if (u.port != NULL((void*)0)) {
813 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("port", 4));
814 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.port, -1));
815 }
816 if (u.path != NULL((void*)0)) {
817 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("path", 4));
818 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.path, -1));
819 }
820 if (u.tail != NULL((void*)0)) {
821 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("tail", 4));
822 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.tail, -1));
823 }
824 if (u.query != NULL((void*)0)) {
825 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("query", 5));
826 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.query, -1));
827 }
828 if (u.fragment != NULL((void*)0)) {
829 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("fragment", 8));
830 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.fragment, -1));
831 }
832 if (errorMsg != NULL((void*)0)) {
833 Ns_TclPrintfResult(interp, "Could not parse URL \"%s\": %s", urlString, errorMsg);
834 result = TCL_ERROR1;
835 } else {
836 Tcl_SetObjResult(interp, resultObj);
837 }
838
839 } else {
840 Ns_TclPrintfResult(interp, "Could not parse URL \"%s\": %s", urlString, errorMsg);
841 result = TCL_ERROR1;
842 }
843 ns_free(url);
844 }
845 /*Ns_Log(Notice, "===== ns_parseurl '%s' returns result %d", urlString, result);*/
846 return result;
847}
848
849
850/*
851 *----------------------------------------------------------------------
852 *
853 * NsTclParseHostportObjCmd --
854 *
855 * Implements "ns_parsehostport". Offers the functionality of
856 * Ns_HttpParseHost2 on the Tcl layer.
857 *
858 * Results:
859 * Tcl result.
860 *
861 * Side effects:
862 * none
863 *
864 *----------------------------------------------------------------------
865 */
866
867int
868NsTclParseHostportObjCmd(ClientData UNUSED(clientData)UNUSED_clientData __attribute__((__unused__)), Tcl_Interp *interp, int objc, Tcl_Obj *const* objv)
869{
870 int result = TCL_OK0, strict = 0;
871 char *hostportString;
872 Ns_ObjvSpec opts[] = {
873 {"-strict", Ns_ObjvBool, &strict, INT2PTR(NS_TRUE)((void *)(intptr_t)(1))},
874 {NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0)}
875 };
876 Ns_ObjvSpec args[] = {
877 {"hostport", Ns_ObjvString, &hostportString, NULL((void*)0)},
878 {NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0)}
879 };
880
881 if (Ns_ParseObjv(opts, args, interp, 1, objc, objv) != NS_OK) {
882 result = TCL_ERROR1;
883 } else {
884 char *hostport, *hostStart, *portStart, *end;
885 bool_Bool success;
886
887 hostport = ns_strdup(hostportString);
888 success = Ns_HttpParseHost2(hostport, strict, &hostStart, &portStart, &end);
889 if (success && *hostStart != '\0' && portStart != hostport) {
890 Tcl_Obj *resultObj = Tcl_NewListObj(0, NULL((void*)0));
891
892 if (hostStart != NULL((void*)0)) {
893 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("host", 4));
894 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(hostStart, -1));
895 }
896 if (portStart != NULL((void*)0)) {
897 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("port", 4));
898 Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(portStart, -1));
899 }
900
901 Tcl_SetObjResult(interp, resultObj);
902
903 } else {
904 Ns_TclPrintfResult(interp, "Could not parse host and port \"%s\"", hostportString);
905 result = TCL_ERROR1;
906 }
907 ns_free(hostport);
908 }
909 return result;
910}
911
912
913/*
914 *----------------------------------------------------------------------
915 *
916 * NsTclAbsoluteUrlObjCmd --
917 *
918 * Implements "ns_absoluteurl". Offers the functionality of
919 * Ns_AbsoluteUrl on the Tcl layer.
920 *
921 * Results:
922 * Tcl result.
923 *
924 * Side effects:
925 * none
926 *
927 *----------------------------------------------------------------------
928 */
929int
930NsTclAbsoluteUrlObjCmd(ClientData UNUSED(clientData)UNUSED_clientData __attribute__((__unused__)), Tcl_Interp *interp, int objc, Tcl_Obj *const* objv)
931{
932 int result = TCL_OK0;
933 char *urlString, *baseString;
934 Ns_ObjvSpec args[] = {
935 {"partialurl", Ns_ObjvString, &urlString, NULL((void*)0)},
936 {"baseurl", Ns_ObjvString, &baseString, NULL((void*)0)},
937 {NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0)}
938 };
939
940 if (Ns_ParseObjv(NULL((void*)0), args, interp, 1, objc, objv) != NS_OK) {
941 result = TCL_ERROR1;
942 } else {
943 Tcl_DString ds;
944
945 Tcl_DStringInit(&ds);
946 if (Ns_AbsoluteUrl(&ds, urlString, baseString) == NS_OK) {
947 Tcl_DStringResult(interp, &ds);
948 } else {
949 Ns_TclPrintfResult(interp, "Could not parse base URL into protocol, host and path");
950 Tcl_DStringFree(&ds);
951 result = TCL_ERROR1;
952 }
953 }
954
955 return result;
956}
957
958/*
959 * Local Variables:
960 * mode: c
961 * c-basic-offset: 4
962 * fill-column: 78
963 * indent-tabs-mode: nil
964 * End:
965 */