From: Frank Ch. Eigler Date: Wed, 25 Mar 2020 03:46:30 +0000 (-0400) Subject: debuginfod: User-Agent and X-Forwarded-For header relay X-Git-Tag: elfutils-0.179~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dbfaf36ac283006621cce0d343462512c8c5635d;p=thirdparty%2Felfutils.git debuginfod: User-Agent and X-Forwarded-For header relay Extend the debuginfod client API with a function to stuff outgoing headers into libcurl http transfers. Use this from debuginfod so federated trees of debuginfod/httpd can trace back to to the originating client for administrative purposes. Docs & test included. Signed-off-by: Frank Ch. Eigler --- diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 989a90295..3329be351 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,15 @@ +2020-03-24 Frank Ch. Eigler + + * debuginfod.h, libdebuginfod.map: New functions for _add_url_header. + * debuginfod-client.c (struct debuginfod_client): Add headers fields. + (debuginfod_add_http_header): New client api to add outgoing headers. + (add_default_headers): Renamed from add_extra_headers, skip if flag. + (debuginfod_query_server): Pass accumulated headers to libcurl. + (debuginfod_end): Clean accumulated headers. + (debuginfod_find_*): Add default headers at this point. + * debuginfod.cxx (handle_buildid): Add conn pointer. Use it to relay + incoming UA and XFF headers to federated upstream debuginfods. + 2020-03-26 Frank Ch. Eigler * debuginfod.cxx (handler_cb): Export two families of metrics for diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index b0d5cb028..043e8aa24 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -85,6 +85,10 @@ struct debuginfod_client /* Stores current/last url, if any. */ char* url; + /* Accumulates outgoing http header names/values. */ + int user_agent_set_p; /* affects add_default_headers */ + struct curl_slist *headers; + /* Can contain all other context, like cache_path, server_urls, timeout or other info gotten from environment variables, the handle data, etc. So those don't have to be reparsed and @@ -311,8 +315,11 @@ debuginfod_clean_cache(debuginfod_client *c, static void -add_extra_headers(CURL *handle) +add_default_headers(debuginfod_client *client) { + if (client->user_agent_set_p) + return; + /* Compute a User-Agent: string to send. The more accurately this describes this host, the likelier that the debuginfod servers might be able to locate debuginfo for us. */ @@ -372,7 +379,7 @@ add_extra_headers(CURL *handle) } char *ua = NULL; - rc = asprintf(& ua, "%s/%s,%s,%s/%s", + rc = asprintf(& ua, "User-Agent: %s/%s,%s,%s/%s", PACKAGE_NAME, PACKAGE_VERSION, utspart ?: "", id ?: "", @@ -381,7 +388,7 @@ add_extra_headers(CURL *handle) ua = NULL; if (ua) - curl_easy_setopt(handle, CURLOPT_USERAGENT, (void*) ua); /* implicit strdup */ + (void) debuginfod_add_http_header (client, ua); free (ua); free (id); @@ -465,6 +472,8 @@ debuginfod_query_server (debuginfod_client *c, free (c->url); c->url = NULL; + add_default_headers(c); + /* Is there any server we can query? If not, don't do any work, just return with ENOSYS. Don't even access the cache. */ urls_envvar = getenv(server_urls_envvar); @@ -724,7 +733,7 @@ debuginfod_query_server (debuginfod_client *c, #endif curl_easy_setopt(data[i].handle, CURLOPT_AUTOREFERER, (long) 1); curl_easy_setopt(data[i].handle, CURLOPT_ACCEPT_ENCODING, ""); - add_extra_headers(data[i].handle); + curl_easy_setopt(data[i].handle, CURLOPT_HTTPHEADER, c->headers); curl_multi_add_handle(curlm, data[i].handle); server_url = strtok_r(NULL, url_delim, &strtok_saveptr); @@ -977,6 +986,10 @@ debuginfod_get_url(debuginfod_client *client) void debuginfod_end (debuginfod_client *client) { + if (client == NULL) + return; + + curl_slist_free_all (client->headers); free (client->url); free (client); } @@ -1011,6 +1024,23 @@ int debuginfod_find_source(debuginfod_client *client, } +/* Add an outgoing HTTP header. */ +int debuginfod_add_http_header (debuginfod_client *client, const char* header) +{ + struct curl_slist *temp = curl_slist_append (client->headers, header); + if (temp == NULL) + return -ENOMEM; + + /* Track if User-Agent: is being set. If so, signal not to add the + default one. */ + if (strncmp (header, "User-Agent:", 11) == 0) + client->user_agent_set_p = 1; + + client->headers = temp; + return 0; +} + + void debuginfod_set_progressfn(debuginfod_client *client, debuginfod_progressfn_t fn) diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 98491c2c6..74eb44372 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -1433,11 +1433,12 @@ debuginfod_find_progress (debuginfod_client *, long a, long b) } -static struct MHD_Response* handle_buildid (const string& buildid /* unsafe */, - const string& artifacttype /* unsafe */, - const string& suffix /* unsafe */, - int *result_fd - ) +static struct MHD_Response* +handle_buildid (MHD_Connection* conn, + const string& buildid /* unsafe */, + const string& artifacttype /* unsafe */, + const string& suffix /* unsafe */, + int *result_fd) { // validate artifacttype string atype_code; @@ -1526,6 +1527,35 @@ static struct MHD_Response* handle_buildid (const string& buildid /* unsafe */, { debuginfod_set_progressfn (client, & debuginfod_find_progress); + if (conn) + { + // Transcribe incoming User-Agent: + string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: ""; + string ua_complete = string("User-Agent: ") + ua; + debuginfod_add_http_header (client, ua_complete.c_str()); + + // Compute larger XFF:, for avoiding info loss during + // federation, and for future cyclicity detection. + string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: ""; + if (xff != "") + xff += string(", "); // comma separated list + + // Compute the client's numeric IP address only - so can't merge with conninfo() + const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn, + MHD_CONNECTION_INFO_CLIENT_ADDRESS); + struct sockaddr *so = u ? u->client_addr : 0; + char hostname[256] = ""; // RFC1035 + if (so && so->sa_family == AF_INET) + (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0, + NI_NUMERICHOST); + else if (so && so->sa_family == AF_INET6) + (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0, + NI_NUMERICHOST); + + string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname); + debuginfod_add_http_header (client, xff_complete.c_str()); + } + if (artifacttype == "debuginfo") fd = debuginfod_find_debuginfo (client, (const unsigned char*) buildid.c_str(), @@ -1735,10 +1765,9 @@ handler_cb (void * /*cls*/, } inc_metric("http_requests_total", "type", artifacttype); - // get the resulting fd so we can report its size int fd; - r = handle_buildid(buildid, artifacttype, suffix, &fd); + r = handle_buildid(connection, buildid, artifacttype, suffix, &fd); if (r) { struct stat fs; @@ -1837,7 +1866,7 @@ dwarf_extract_source_paths (Elf *elf, set& debug_sourcefiles) struct MHD_Response *r = 0; try { - r = handle_buildid (buildid, "debuginfo", "", &alt_fd); + r = handle_buildid (0, buildid, "debuginfo", "", &alt_fd); } catch (const reportable_exception& e) { diff --git a/debuginfod/debuginfod.h b/debuginfod/debuginfod.h index 2ace5f350..8d90838b4 100644 --- a/debuginfod/debuginfod.h +++ b/debuginfod/debuginfod.h @@ -84,6 +84,9 @@ void* debuginfod_get_user_data (debuginfod_client *client); /* Get the current or last active URL, if known. */ const char* debuginfod_get_url (debuginfod_client *client); +/* Add an outgoing HTTP request "Header: Value". Copies string. */ +int debuginfod_add_http_header (debuginfod_client *client, const char* header); + /* Release debuginfod client connection context handle. */ void debuginfod_end (debuginfod_client *client); diff --git a/debuginfod/libdebuginfod.map b/debuginfod/libdebuginfod.map index d84e8924f..b8edfb016 100644 --- a/debuginfod/libdebuginfod.map +++ b/debuginfod/libdebuginfod.map @@ -13,4 +13,5 @@ ELFUTILS_0.179 { debuginfod_set_user_data; debuginfod_get_user_data; debuginfod_get_url; + debuginfod_add_http_header; }; diff --git a/doc/ChangeLog b/doc/ChangeLog index cfb03b380..38ce441c4 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,9 @@ +2020-03-24 Frank Ch. Eigler + + * debuginfod_add_http_header.3: New function, documented ... + * debuginfod_find_debuginfo.3: ... here. + * Makefile.am (notrans_dist_*_man3): Add it. + 2020-03-26 Frank Ch. Eigler * debuginfod.8 (-R): Note zstd compression complications diff --git a/doc/Makefile.am b/doc/Makefile.am index 38b8226d7..f0c7e55df 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -24,6 +24,7 @@ notrans_dist_man1_MANS= if DEBUGINFOD notrans_dist_man8_MANS += debuginfod.8 +notrans_dist_man3_MANS += debuginfod_add_http_header.3 notrans_dist_man3_MANS += debuginfod_begin.3 notrans_dist_man3_MANS += debuginfod_end.3 notrans_dist_man3_MANS += debuginfod_find_debuginfo.3 diff --git a/doc/debuginfod_add_http_header.3 b/doc/debuginfod_add_http_header.3 new file mode 100644 index 000000000..16279936e --- /dev/null +++ b/doc/debuginfod_add_http_header.3 @@ -0,0 +1 @@ +.so man3/debuginfod_find_debuginfo.3 diff --git a/doc/debuginfod_find_debuginfo.3 b/doc/debuginfod_find_debuginfo.3 index 42f2265b2..1c7c49918 100644 --- a/doc/debuginfod_find_debuginfo.3 +++ b/doc/debuginfod_find_debuginfo.3 @@ -54,6 +54,8 @@ OPTIONAL FUNCTIONS .BI " void *" data ");" .BI "void* debuginfod_get_user_data(debuginfod_client *" client ");" .BI "const char* debuginfod_get_url(debuginfod_client *" client ");" +.BI "int debuginfod_add_http_header(debuginfod_client *" client "," +.BI " const char* " header ");" .SH DESCRIPTION @@ -160,6 +162,21 @@ The resulting string is owned by the library, and must not be modified or freed. The caller should copy it if it is needed beyond the release of the client object. +.SS "HTTP HEADER" + +Before a lookup function is initiated, a client application may +add HTTP request headers to future downloads. +.BR \%debuginfod_add_http_header () +may be called with strings of the form +.BR \%"Header:\~value" . +These strings are copied by the library. A zero return value +indicates success, but out-of-memory conditions may result in +a non-zero \fI-ENOMEM\fP. + +By default, the library adds a descriptive \fIUser-Agent:\fP +header to outgoing requests. If the client application adds +a header with the same name, this default is suppressed. + .SH "CACHE" If the query is successful, the \fBdebuginfod_find_*\fP() functions save the target file to a local cache. The location of the cache is controlled diff --git a/tests/ChangeLog b/tests/ChangeLog index ba88ecf6a..d1c9e9528 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,8 @@ +2020-03-24 Frank Ch. Eigler + + * run-debuginfod-find.sh: Test relay of UA and XFF headers across + federating debuginfods. + 2020-03-26 Frank Ch. Eigler * run-debuginfod-find.sh: Look for debuginfod's new diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh index db36420f7..799b7005a 100755 --- a/tests/run-debuginfod-find.sh +++ b/tests/run-debuginfod-find.sh @@ -95,8 +95,9 @@ wait_ready() fi } -env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat R F Z L & +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 & PID1=$! +tempfiles vlog4 # Server must become ready wait_ready $PORT1 'ready' 1 export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1/ # or without trailing / @@ -388,6 +389,14 @@ fi rm -rf $DEBUGINFOD_CACHE_PATH testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID +# send a request to stress XFF and User-Agent federation relay; +# we'll grep for the two patterns in vlog4 +curl -s -H 'User-Agent: TESTCURL' -H 'X-Forwarded-For: TESTXFF' $DEBUGINFOD_URLS/buildid/deaddeadbeef00000000/debuginfo -o /dev/null || true + +grep UA:TESTCURL vlog4 +grep XFF:TESTXFF vlog4 + + # confirm that first server can't resolve symlinked info in L/ but second can BUILDID=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \ -a L/foo | grep 'Build ID' | cut -d ' ' -f 7`