From 1478a0e88067a8d2b905bb262190643d69eb7df4 Mon Sep 17 00:00:00 2001 From: Nicolas Gelot Date: Sat, 30 Mar 2019 00:05:55 +0100 Subject: [PATCH 1/4] Update CI with docker job --- .gitlab-ci.yml | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 47b51fadf..287ad22bd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,12 +1,20 @@ image: $CI_REGISTRY_IMAGE/env:latest stages: + - check - build - test - report - deploy -front-end: +python: + stage: check + before_script: + - ./manage.sh update_dev_packages + script: + - ./manage.sh pep8_check + +build:web: stage: build before_script: - ./manage.sh npm_packages @@ -16,14 +24,18 @@ front-end: - ./manage.sh styles - ./manage.sh grunt_build -coding-rules: +build:docker: stage: build before_script: - - ./manage.sh update_dev_packages + - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY script: - - ./manage.sh pep8_check + - docker build -t $CI_REGISTRY_IMAGE:${CI_COMMIT_TAG:-latest} . + - docker push $CI_REGISTRY_IMAGE:${CI_COMMIT_TAG:-latest} + only: + - master + - tags -unit-test: +test:unit: stage: test before_script: - ./manage.sh update_dev_packages @@ -34,7 +46,7 @@ unit-test: - coverage expire_in: 1 hour -functional-test: +test:functional: stage: test image: docker:stable services: @@ -57,11 +69,11 @@ coverage: script: - ./manage.sh coverage dependencies: - - unit-test - - functional-test + - test:unit + - test:functional coverage: '/TOTAL.*\s+(\d+%)$/' -deploy-test: +deploy:test: image: docker:stable stage: deploy only: -- GitLab From a9e2a3a01c19dfcb340c211a6590fe4948301803 Mon Sep 17 00:00:00 2001 From: Nicolas Gelot Date: Sat, 30 Mar 2019 00:00:13 +0100 Subject: [PATCH 2/4] Cleanup and update dependencies --- Dockerfile | 5 +---- requirements.txt | 15 ++++++--------- searx/__init__.py | 15 +-------------- searx/webapp.py | 23 +++++------------------ 4 files changed, 13 insertions(+), 45 deletions(-) diff --git a/Dockerfile b/Dockerfile index 70fb0323b..40ac3eb32 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,9 +4,7 @@ RUN apk add \ git \ build-base \ libxml2-dev \ - libxslt-dev \ - libffi-dev \ - openssl-dev + libxslt-dev # Only to use the docker cache and optimize the build time WORKDIR /src @@ -24,7 +22,6 @@ LABEL description="A privacy-respecting, hackable metasearch engine." RUN apk add \ ca-certificates \ libxslt \ - openssl \ && pip install coverage COPY --from=builder /install/ /usr/local/ diff --git a/requirements.txt b/requirements.txt index fc7eac005..ba135f738 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,8 @@ -redis==3.2.1 -certifi==2017.11.5 flask==1.0.2 flask-babel==0.11.2 -lxml==4.2.3 -idna==2.7 -pygments==2.1.3 -pyopenssl==18.0.0 -python-dateutil==2.7.3 -pyyaml==3.13 -requests[socks]==2.19.1 +lxml==4.3.3 +pygments==2.3.1 +python-dateutil==2.8.0 +pyyaml==5.1 +requests[socks]==2.21.0 +redis==3.2.1 diff --git a/searx/__init__.py b/searx/__init__.py index 7324c1e4b..d0bb7126d 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -15,18 +15,11 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, ''' -import certifi import logging from os import environ from os.path import realpath, dirname, join, abspath, isfile from io import open -from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION -try: - from yaml import load -except ImportError: - from sys import exit, stderr - stderr.write('[E] install pyyaml\n') - exit(2) +from yaml import load searx_dir = abspath(dirname(__file__)) engine_dir = dirname(realpath(__file__)) @@ -73,12 +66,6 @@ logging.basicConfig(level=getattr(logging, searx_loglevel)) logger = logging.getLogger('searx') logger.debug('read configuration from %s', settings_path) -# Workaround for openssl versions <1.0.2 -# https://github.com/certifi/python-certifi/issues/26 -if OPENSSL_VERSION_INFO[0:3] < (1, 0, 2): - if hasattr(certifi, 'old_where'): - environ['REQUESTS_CA_BUNDLE'] = certifi.old_where() - logger.warning('You are using an old openssl version({0}), please upgrade above 1.0.2!'.format(OPENSSL_VERSION)) logger.info('Initialisation done') diff --git a/searx/webapp.py b/searx/webapp.py index 84f9a504e..d7e773829 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -37,16 +37,11 @@ from searx import logger logger = logger.getChild('webapp') -try: - from pygments import highlight - from pygments.lexers import get_lexer_by_name - from pygments.util import ClassNotFound - from pygments.formatters import HtmlFormatter -except ImportError: - logger.critical("cannot import dependency: pygments") - from sys import exit - - exit(1) +from pygments import highlight +from pygments.lexers import get_lexer_by_name +from pygments.util import ClassNotFound +from pygments.formatters import HtmlFormatter + from html import escape from datetime import datetime, timedelta from werkzeug.contrib.fixers import ProxyFix @@ -80,14 +75,6 @@ from searx.url_utils import urlencode, urlparse, urljoin from searx.utils import new_hmac import threading -# check if the pyopenssl package is installed. -# It is needed for SSL connection without trouble, see #298 -try: - import OpenSSL.SSL # NOQA -except ImportError: - logger.critical("The pyopenssl package has to be installed.\n" - "Some HTTPS connections will fail") - from io import StringIO # serve pages with HTTP/1.1 -- GitLab From 6e676cc0d25c38106e0256e406d9b3e9041f6dfd Mon Sep 17 00:00:00 2001 From: Nicolas Gelot Date: Sat, 30 Mar 2019 01:20:07 +0100 Subject: [PATCH 3/4] Introduce flake8 check for main module --- manage.sh | 1 + requirements-dev.txt | 3 ++- searx/engines/arxiv.py | 2 +- searx/engines/pubmed.py | 2 +- searx/engines/wikidata.py | 2 +- searx/query.py | 30 ++++++++++++++-------------- searx/utils.py | 11 ++++++----- searx/webapp.py | 41 +++++++++++++++++---------------------- 8 files changed, 45 insertions(+), 47 deletions(-) diff --git a/manage.sh b/manage.sh index 47da7b36e..1d21b5886 100755 --- a/manage.sh +++ b/manage.sh @@ -38,6 +38,7 @@ pep8_check() { # W503 line break before binary operator # E722 do not use bare 'except' pycodestyle --exclude=searx/static --max-line-length=120 --ignore "E402,W503,E722" "$SEARX_DIR" "$BASE_DIR/tests" + flake8 --ignore=E722 $SEARX_DIR/*.py } unit_tests() { diff --git a/requirements-dev.txt b/requirements-dev.txt index 1de575509..695f9e183 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,7 @@ babel==2.3.4 mock==2.0.0 -pycodestyle==2.4.0 +pycodestyle==2.5.0 +flake8==3.7.7 mockredispy==2.9.3 pytest==4.1.0 pytest-cov==2.6.1 diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index 5ef84f0c1..84e844fbb 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -61,7 +61,7 @@ def response(resp): content = content_string.format(doi_content="", abstract_content=abstract) if len(content) > 300: - content = content[0:300] + "..." + content = content[0:300] + "..." # TODO: center snippet on query term publishedDate = datetime.strptime(entry.xpath('.//published')[0].text, '%Y-%m-%dT%H:%M:%SZ') diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index 055f09226..82a37a564 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -81,7 +81,7 @@ def response(resp): pass if len(content) > 300: - content = content[0:300] + "..." + content = content[0:300] + "..." # TODO: center snippet on query term res_dict = {'url': url, diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 047f1ba77..81cf262c5 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -374,7 +374,7 @@ def add_url(urls, result, property_id=None, default_label=None, url_prefix=None, # wiki links don't have property in wikidata page if link_type and 'wiki' in link_type: - links.append(get_wikilink(result, link_type)) + links.append(get_wikilink(result, link_type)) else: dom_element = result.xpath(property_xpath.replace('{propertyid}', property_id)) if dom_element: diff --git a/searx/query.py b/searx/query.py index 89fe0607b..1dde7f329 100644 --- a/searx/query.py +++ b/searx/query.py @@ -22,7 +22,6 @@ from searx.engines import ( categories, engines, engine_shortcuts ) import re -import sys VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') @@ -78,20 +77,21 @@ class RawTextQuery(object): # if correct language-code is found # set it as new search-language - if (lang == lang_id - or lang == lang_name - or lang == english_name - or lang.replace('-', ' ') == country)\ - and lang not in self.languages: - parse_next = True - lang_parts = lang_id.split('-') - if len(lang_parts) == 2: - self.languages.append(lang_parts[0] + '-' + lang_parts[1].upper()) - else: - self.languages.append(lang_id) - # to ensure best match (first match is not necessarily the best one) - if lang == lang_id: - break + if ( + lang in [lang_id, lang_name, english_name] + or lang.replace("-", " ") == country # noqa + ) and lang not in self.languages: + parse_next = True + lang_parts = lang_id.split("-") + if len(lang_parts) == 2: + self.languages.append( + lang_parts[0] + "-" + lang_parts[1].upper() + ) + else: + self.languages.append(lang_id) + # to ensure best match (first match is not necessarily the best one) + if lang == lang_id: + break # user may set a valid, yet not selectable language if VALID_LANGUAGE_CODE.match(lang): diff --git a/searx/utils.py b/searx/utils.py index d1a28ea24..a0df43a7f 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -11,7 +11,7 @@ from codecs import getincrementalencoder from imp import load_source from numbers import Number from os.path import splitext, join -from io import open +from pathlib import Path from random import choice import sys import json @@ -19,7 +19,6 @@ import json from searx import settings from searx.version import VERSION_STRING from searx.languages import language_codes -from searx import settings from searx import logger from io import StringIO @@ -31,8 +30,10 @@ logger = logger.getChild('utils') blocked_tags = ('script', 'style') -useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__)) - + "/data/useragents.json", 'r', encoding='utf-8').read()) +useragents = json.load(open( + Path(__file__).parent / "data" / "useragents.json", + encoding='utf-8') +) def searx_useragent(): @@ -161,7 +162,7 @@ def get_resources_directory(searx_directory, subdirectory, resources_directory): if not resources_directory: resources_directory = os.path.join(searx_directory, subdirectory) if not os.path.isdir(resources_directory): - raise Exception(directory + " is not a directory") + raise Exception(resources_directory + " is not a directory") return resources_directory diff --git a/searx/webapp.py b/searx/webapp.py index d7e773829..8d837a870 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -17,17 +17,10 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, ''' -if __name__ == '__main__': - from sys import path - from os.path import realpath, dirname - - path.append(realpath(dirname(realpath(__file__)) + '/../')) - import hashlib import hmac import json import os -import sys import time import copy @@ -35,7 +28,6 @@ import requests from searx import logger -logger = logger.getChild('webapp') from pygments import highlight from pygments.lexers import get_lexer_by_name @@ -75,11 +67,11 @@ from searx.url_utils import urlencode, urlparse, urljoin from searx.utils import new_hmac import threading -from io import StringIO - # serve pages with HTTP/1.1 from werkzeug.serving import WSGIRequestHandler +logger = logger.getChild('webapp') + WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0')) # about static @@ -306,10 +298,11 @@ def render(template_name, override_theme=None, **kwargs): if 'categories' not in kwargs: kwargs['categories'] = ['general'] - kwargs['categories'].extend(x for x in - sorted(categories.keys()) - if x != 'general' - and x in enabled_categories) + kwargs["categories"].extend( + x + for x in sorted(categories.keys()) + if x != "general" and x in enabled_categories + ) if 'all_categories' not in kwargs: kwargs['all_categories'] = ['general'] @@ -415,7 +408,7 @@ def pre_request(): else: try: preferences.parse_dict(request.form) - except Exception as e: + except Exception: logger.exception('invalid settings') request.errors.append(gettext('Invalid settings')) @@ -431,8 +424,9 @@ def pre_request(): allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: - if ((plugin.default_on and plugin.id not in disabled_plugins) - or plugin.id in allowed_plugins): + if ( + plugin.default_on and plugin.id not in disabled_plugins + ) or plugin.id in allowed_plugins: request.user_plugins.append(plugin) @@ -631,7 +625,6 @@ def preferences(): # render preferences image_proxy = request.preferences.get_value('image_proxy') - lang = request.preferences.get_value('language') disabled_engines = request.preferences.engines.get_disabled() allowed_plugins = request.preferences.plugins.get_enabled() @@ -677,11 +670,13 @@ def preferences(): def _is_selected_language_supported(engine, preferences): - language = preferences.get_value('language') - return (language == 'all' - or match_language(language, - getattr(engine, 'supported_languages', []), - getattr(engine, 'language_aliases', {}), None)) + language = preferences.get_value("language") + return language == "all" or match_language( + language, + getattr(engine, "supported_languages", []), + getattr(engine, "language_aliases", {}), + None, + ) @app.route('/image_proxy', methods=['GET']) -- GitLab From 2cc736bdea21aab0c7b1680622df9c1496526411 Mon Sep 17 00:00:00 2001 From: Nicolas Gelot Date: Sat, 30 Mar 2019 09:14:08 +0100 Subject: [PATCH 4/4] Fix python -X dev warnings --- searx/__init__.py | 10 +++++++--- searx/engines/__init__.py | 10 +++++----- searx/engines/currency_convert.py | 11 +++++------ searx/engines/json_engine.py | 2 +- searx/utils.py | 10 ++++------ searx/webapp.py | 2 +- 6 files changed, 23 insertions(+), 22 deletions(-) diff --git a/searx/__init__.py b/searx/__init__.py index d0bb7126d..5bddf9218 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -18,8 +18,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. import logging from os import environ from os.path import realpath, dirname, join, abspath, isfile -from io import open -from yaml import load + +import yaml searx_dir = abspath(dirname(__file__)) engine_dir = dirname(realpath(__file__)) @@ -46,7 +46,11 @@ if not settings_path: # load settings with open(settings_path, 'r', encoding='utf-8') as settings_yaml: - settings = load(settings_yaml) + # XXX: docker-compose does not support yet yaml >= 5 + if int(yaml.__version__.split('.')[0]) >= 5: + settings = yaml.load(settings_yaml, Loader=yaml.FullLoader) + else: + settings = yaml.load(settings_yaml) ''' enable debug if diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index f32b57202..953fa8bf0 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -18,12 +18,11 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. import sys import threading -from os.path import realpath, dirname -from io import open +import json +from pathlib import Path from babel.localedata import locale_identifiers from flask_babel import gettext from operator import itemgetter -from json import loads from requests import get from searx import settings from searx import logger @@ -32,13 +31,14 @@ from searx.utils import load_module, match_language logger = logger.getChild('engines') -engine_dir = dirname(realpath(__file__)) +engine_dir = Path(__file__).parent engines = {} categories = {'general': []} -languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read()) +with open(engine_dir.parent / "data" / "engines_languages.json", encoding='utf-8') as fd: + languages = json.load(fd) babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())] diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 9424d7d5e..2a2a3c35d 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,10 +1,9 @@ import json import re -import os import sys import unicodedata -from io import open +from pathlib import Path from datetime import datetime @@ -88,10 +87,10 @@ def response(resp): def load(): global db - current_dir = os.path.dirname(os.path.realpath(__file__)) - json_data = open(current_dir + "/../data/currencies.json", 'r', encoding='utf-8').read() - - db = json.loads(json_data) + with open( + Path(__file__).parent.parent / "data" / "currencies.json", encoding='utf-8' + ) as fd: + db = json.load(fd) load() diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 550947ab6..7a48bc8ce 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -1,4 +1,4 @@ -from collections import Iterable +from collections.abc import Iterable from json import loads from searx.url_utils import urlencode from searx.utils import to_string diff --git a/searx/utils.py b/searx/utils.py index a0df43a7f..a48dc36bf 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -3,12 +3,12 @@ import hashlib import hmac import os import re +from importlib.machinery import SourceFileLoader from babel.core import get_global from babel.dates import format_date from babel import UnknownLocaleError from codecs import getincrementalencoder -from imp import load_source from numbers import Number from os.path import splitext, join from pathlib import Path @@ -30,10 +30,8 @@ logger = logger.getChild('utils') blocked_tags = ('script', 'style') -useragents = json.load(open( - Path(__file__).parent / "data" / "useragents.json", - encoding='utf-8') -) +with open(Path(__file__).parent / "data" / "useragents.json", encoding='utf-8') as fd: + useragents = json.load(fd) def searx_useragent(): @@ -357,7 +355,7 @@ def load_module(filename, module_dir): if modname in sys.modules: del sys.modules[modname] filepath = join(module_dir, filename) - module = load_source(modname, filepath) + module = SourceFileLoader(modname, filepath).load_module() module.name = modname return module diff --git a/searx/webapp.py b/searx/webapp.py index 8d837a870..22f8eb143 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -36,7 +36,7 @@ from pygments.formatters import HtmlFormatter from html import escape from datetime import datetime, timedelta -from werkzeug.contrib.fixers import ProxyFix +from werkzeug.middleware.proxy_fix import ProxyFix from flask import ( Flask, request, render_template, url_for, Response, make_response, redirect, send_from_directory -- GitLab