From 48f9d957b64a81659dd470cc9ea4ea373e236389 Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Thu, 20 Jan 2022 12:09:08 -0300 Subject: [PATCH 1/4] Hide full url path when 404 error is thrown --- searx/network/raise_for_httperror.py | 5 ++++- searx/search/processors/online.py | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/searx/network/raise_for_httperror.py b/searx/network/raise_for_httperror.py index bd12df9a9..ef60d405c 100644 --- a/searx/network/raise_for_httperror.py +++ b/searx/network/raise_for_httperror.py @@ -2,7 +2,7 @@ """ Raise exception for an HTTP response is an error. """ -from searx.exceptions import (SearxEngineCaptchaException, SearxEngineTooManyRequestsException, +from searx.exceptions import (SearxEngineAPIException, SearxEngineCaptchaException, SearxEngineTooManyRequestsException, SearxEngineAccessDeniedException) @@ -63,4 +63,7 @@ def raise_for_httperror(resp): suspended_time=3600 * 24) if resp.status_code == 429: raise SearxEngineTooManyRequestsException() + if resp.status_code == 404: + message = f'(404) Resource not found for "{resp.url.host}". Are you sure the API endpoint is valid?' + raise SearxEngineAPIException(message) resp.raise_for_status() diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 66719ea9b..f7cdf45a8 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -15,6 +15,7 @@ from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptc from searx.metrology.error_recorder import record_exception, record_error from searx.search.processors.abstract import EngineProcessor +from urllib.parse import urlparse logger = logger.getChild('search.processor.online') @@ -175,9 +176,11 @@ class OnlineProcessor(EngineProcessor): elif (issubclass(e.__class__, (httpx.HTTPError, httpx.StreamError))): result_container.add_unresponsive_engine(self.engine_name, 'HTTP error') # other requests exception + response_url = urlparse(str(e.response.url)) logger.exception("engine {0} : requests exception" - "(search duration : {1} s, timeout: {2} s) : {3}" - .format(self.engine_name, engine_time, timeout_limit, e)) + "(search duration : {1} s, timeout: {2} s) : " + "Status code {3} while requesting {4}" + .format(self.engine_name, engine_time, timeout_limit, e.response.status_code, response_url.hostname)) http_exception = True elif (issubclass(e.__class__, SearxEngineCaptchaException)): result_container.add_unresponsive_engine(self.engine_name, 'CAPTCHA required') -- GitLab From 140378ea33184fc16fd3fd97cc3cc585cab5c5b0 Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Thu, 20 Jan 2022 12:19:42 -0300 Subject: [PATCH 2/4] Fix line too long --- searx/search/processors/online.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index f7cdf45a8..f52084d45 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -177,10 +177,12 @@ class OnlineProcessor(EngineProcessor): result_container.add_unresponsive_engine(self.engine_name, 'HTTP error') # other requests exception response_url = urlparse(str(e.response.url)) + status_code = e.response.status_code + hostname = response_url.hostname logger.exception("engine {0} : requests exception" "(search duration : {1} s, timeout: {2} s) : " "Status code {3} while requesting {4}" - .format(self.engine_name, engine_time, timeout_limit, e.response.status_code, response_url.hostname)) + .format(self.engine_name, engine_time, timeout_limit, status_code, hostname)) http_exception = True elif (issubclass(e.__class__, SearxEngineCaptchaException)): result_container.add_unresponsive_engine(self.engine_name, 'CAPTCHA required') -- GitLab From eff8e70c934e3683dbc5423e54445ca0f48efdf0 Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Wed, 26 Jan 2022 16:20:42 -0300 Subject: [PATCH 3/4] Disabling nginx logs --- etc/nginx/conf.d/spot.conf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/etc/nginx/conf.d/spot.conf b/etc/nginx/conf.d/spot.conf index a95b5aa6b..87e19f0f8 100644 --- a/etc/nginx/conf.d/spot.conf +++ b/etc/nginx/conf.d/spot.conf @@ -20,6 +20,9 @@ server { root /var/www/spot; + access_log /dev/null; + error_log /dev/null; + location = / { limit_req zone=botlimit burst=10; include /etc/nginx/proxy_spot_params; -- GitLab From cb60689de25de1cdaaf2044319d5ac4ec6c7d26c Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Thu, 27 Jan 2022 09:50:48 -0300 Subject: [PATCH 4/4] Revert "Disabling nginx logs" This reverts commit eff8e70c934e3683dbc5423e54445ca0f48efdf0. --- etc/nginx/conf.d/spot.conf | 3 --- 1 file changed, 3 deletions(-) diff --git a/etc/nginx/conf.d/spot.conf b/etc/nginx/conf.d/spot.conf index 87e19f0f8..a95b5aa6b 100644 --- a/etc/nginx/conf.d/spot.conf +++ b/etc/nginx/conf.d/spot.conf @@ -20,9 +20,6 @@ server { root /var/www/spot; - access_log /dev/null; - error_log /dev/null; - location = / { limit_req zone=botlimit burst=10; include /etc/nginx/proxy_spot_params; -- GitLab