diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 47b51fadf3f5944a6669f808ccec0a3dff4977a1..287ad22bd2f3ad5291ecaab7746d5d84193adbd9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,12 +1,20 @@ image: $CI_REGISTRY_IMAGE/env:latest stages: + - check - build - test - report - deploy -front-end: +python: + stage: check + before_script: + - ./manage.sh update_dev_packages + script: + - ./manage.sh pep8_check + +build:web: stage: build before_script: - ./manage.sh npm_packages @@ -16,14 +24,18 @@ front-end: - ./manage.sh styles - ./manage.sh grunt_build -coding-rules: +build:docker: stage: build before_script: - - ./manage.sh update_dev_packages + - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY script: - - ./manage.sh pep8_check + - docker build -t $CI_REGISTRY_IMAGE:${CI_COMMIT_TAG:-latest} . + - docker push $CI_REGISTRY_IMAGE:${CI_COMMIT_TAG:-latest} + only: + - master + - tags -unit-test: +test:unit: stage: test before_script: - ./manage.sh update_dev_packages @@ -34,7 +46,7 @@ unit-test: - coverage expire_in: 1 hour -functional-test: +test:functional: stage: test image: docker:stable services: @@ -57,11 +69,11 @@ coverage: script: - ./manage.sh coverage dependencies: - - unit-test - - functional-test + - test:unit + - test:functional coverage: '/TOTAL.*\s+(\d+%)$/' -deploy-test: +deploy:test: image: docker:stable stage: deploy only: diff --git a/Dockerfile b/Dockerfile index 70fb0323bb14b67f157d02f6017bc08a6cefaf23..40ac3eb320098673275e7ee864731e7f8cd8090d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,9 +4,7 @@ RUN apk add \ git \ build-base \ libxml2-dev \ - libxslt-dev \ - libffi-dev \ - openssl-dev + libxslt-dev # Only to use the docker cache and optimize the build time WORKDIR /src @@ -24,7 +22,6 @@ LABEL description="A privacy-respecting, hackable metasearch engine." RUN apk add \ ca-certificates \ libxslt \ - openssl \ && pip install coverage COPY --from=builder /install/ /usr/local/ diff --git a/manage.sh b/manage.sh index 47da7b36e7b850dba5c655678340c74d61f5b353..1d21b5886557e9c0a316fc109901662978091d63 100755 --- a/manage.sh +++ b/manage.sh @@ -38,6 +38,7 @@ pep8_check() { # W503 line break before binary operator # E722 do not use bare 'except' pycodestyle --exclude=searx/static --max-line-length=120 --ignore "E402,W503,E722" "$SEARX_DIR" "$BASE_DIR/tests" + flake8 --ignore=E722 $SEARX_DIR/*.py } unit_tests() { diff --git a/requirements-dev.txt b/requirements-dev.txt index 1de575509df51a7ab65090584ed94374bb422a9a..695f9e18333ea12f236aba72309063e2f51b121b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,7 @@ babel==2.3.4 mock==2.0.0 -pycodestyle==2.4.0 +pycodestyle==2.5.0 +flake8==3.7.7 mockredispy==2.9.3 pytest==4.1.0 pytest-cov==2.6.1 diff --git a/requirements.txt b/requirements.txt index fc7eac0050a89571a9fbc3abeba030fe65e126f8..ba135f738b3a6463b9b89de931c79b9b346bf15d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,8 @@ -redis==3.2.1 -certifi==2017.11.5 flask==1.0.2 flask-babel==0.11.2 -lxml==4.2.3 -idna==2.7 -pygments==2.1.3 -pyopenssl==18.0.0 -python-dateutil==2.7.3 -pyyaml==3.13 -requests[socks]==2.19.1 +lxml==4.3.3 +pygments==2.3.1 +python-dateutil==2.8.0 +pyyaml==5.1 +requests[socks]==2.21.0 +redis==3.2.1 diff --git a/searx/__init__.py b/searx/__init__.py index 7324c1e4b1eded5819d14659dc8f9a39bbc724b2..5bddf92189ecd0f1b7a2f70941ddda54408762b9 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -15,18 +15,11 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, ''' -import certifi import logging from os import environ from os.path import realpath, dirname, join, abspath, isfile -from io import open -from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION -try: - from yaml import load -except ImportError: - from sys import exit, stderr - stderr.write('[E] install pyyaml\n') - exit(2) + +import yaml searx_dir = abspath(dirname(__file__)) engine_dir = dirname(realpath(__file__)) @@ -53,7 +46,11 @@ if not settings_path: # load settings with open(settings_path, 'r', encoding='utf-8') as settings_yaml: - settings = load(settings_yaml) + # XXX: docker-compose does not support yet yaml >= 5 + if int(yaml.__version__.split('.')[0]) >= 5: + settings = yaml.load(settings_yaml, Loader=yaml.FullLoader) + else: + settings = yaml.load(settings_yaml) ''' enable debug if @@ -73,12 +70,6 @@ logging.basicConfig(level=getattr(logging, searx_loglevel)) logger = logging.getLogger('searx') logger.debug('read configuration from %s', settings_path) -# Workaround for openssl versions <1.0.2 -# https://github.com/certifi/python-certifi/issues/26 -if OPENSSL_VERSION_INFO[0:3] < (1, 0, 2): - if hasattr(certifi, 'old_where'): - environ['REQUESTS_CA_BUNDLE'] = certifi.old_where() - logger.warning('You are using an old openssl version({0}), please upgrade above 1.0.2!'.format(OPENSSL_VERSION)) logger.info('Initialisation done') diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index f32b57202352f3ac9d3ac5df25080768a2c03452..953fa8bf0360fa19fc07ba459751d4663e707b39 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -18,12 +18,11 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. import sys import threading -from os.path import realpath, dirname -from io import open +import json +from pathlib import Path from babel.localedata import locale_identifiers from flask_babel import gettext from operator import itemgetter -from json import loads from requests import get from searx import settings from searx import logger @@ -32,13 +31,14 @@ from searx.utils import load_module, match_language logger = logger.getChild('engines') -engine_dir = dirname(realpath(__file__)) +engine_dir = Path(__file__).parent engines = {} categories = {'general': []} -languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read()) +with open(engine_dir.parent / "data" / "engines_languages.json", encoding='utf-8') as fd: + languages = json.load(fd) babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())] diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index 5ef84f0c1df2e26146d42855054024714fae83a7..84e844fbbcd5e8fd5a277a9a5185ceb17444bd89 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -61,7 +61,7 @@ def response(resp): content = content_string.format(doi_content="", abstract_content=abstract) if len(content) > 300: - content = content[0:300] + "..." + content = content[0:300] + "..." # TODO: center snippet on query term publishedDate = datetime.strptime(entry.xpath('.//published')[0].text, '%Y-%m-%dT%H:%M:%SZ') diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 9424d7d5ece8ad60b6f262baf6b0485ce521a00a..2a2a3c35d80d60b8a9c28ab56a7f50529d4096c2 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,10 +1,9 @@ import json import re -import os import sys import unicodedata -from io import open +from pathlib import Path from datetime import datetime @@ -88,10 +87,10 @@ def response(resp): def load(): global db - current_dir = os.path.dirname(os.path.realpath(__file__)) - json_data = open(current_dir + "/../data/currencies.json", 'r', encoding='utf-8').read() - - db = json.loads(json_data) + with open( + Path(__file__).parent.parent / "data" / "currencies.json", encoding='utf-8' + ) as fd: + db = json.load(fd) load() diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 550947ab6818587e15b77259f06c348c3f2baeca..7a48bc8cee536229822d9fc1ecf77a75912da2e2 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -1,4 +1,4 @@ -from collections import Iterable +from collections.abc import Iterable from json import loads from searx.url_utils import urlencode from searx.utils import to_string diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index 055f092269d64adcfb5d3f004753bccbbaf8c176..82a37a564e7efca26fa1c50066215f467e40f506 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -81,7 +81,7 @@ def response(resp): pass if len(content) > 300: - content = content[0:300] + "..." + content = content[0:300] + "..." # TODO: center snippet on query term res_dict = {'url': url, diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 047f1ba77c0c82d52826b7370576f05ea5f4fd69..81cf262c50c5bd775f113fffa64e606f5d1100a6 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -374,7 +374,7 @@ def add_url(urls, result, property_id=None, default_label=None, url_prefix=None, # wiki links don't have property in wikidata page if link_type and 'wiki' in link_type: - links.append(get_wikilink(result, link_type)) + links.append(get_wikilink(result, link_type)) else: dom_element = result.xpath(property_xpath.replace('{propertyid}', property_id)) if dom_element: diff --git a/searx/query.py b/searx/query.py index 89fe0607b584663034690725cb2f692fc2d55e3e..1dde7f329bd61653b395ac4fd1ca4690347214a6 100644 --- a/searx/query.py +++ b/searx/query.py @@ -22,7 +22,6 @@ from searx.engines import ( categories, engines, engine_shortcuts ) import re -import sys VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') @@ -78,20 +77,21 @@ class RawTextQuery(object): # if correct language-code is found # set it as new search-language - if (lang == lang_id - or lang == lang_name - or lang == english_name - or lang.replace('-', ' ') == country)\ - and lang not in self.languages: - parse_next = True - lang_parts = lang_id.split('-') - if len(lang_parts) == 2: - self.languages.append(lang_parts[0] + '-' + lang_parts[1].upper()) - else: - self.languages.append(lang_id) - # to ensure best match (first match is not necessarily the best one) - if lang == lang_id: - break + if ( + lang in [lang_id, lang_name, english_name] + or lang.replace("-", " ") == country # noqa + ) and lang not in self.languages: + parse_next = True + lang_parts = lang_id.split("-") + if len(lang_parts) == 2: + self.languages.append( + lang_parts[0] + "-" + lang_parts[1].upper() + ) + else: + self.languages.append(lang_id) + # to ensure best match (first match is not necessarily the best one) + if lang == lang_id: + break # user may set a valid, yet not selectable language if VALID_LANGUAGE_CODE.match(lang): diff --git a/searx/utils.py b/searx/utils.py index d1a28ea249563043eedb2d6de3eb62e0c2744ef3..a48dc36bf18c08839084ba088d92e56514b2adf0 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -3,15 +3,15 @@ import hashlib import hmac import os import re +from importlib.machinery import SourceFileLoader from babel.core import get_global from babel.dates import format_date from babel import UnknownLocaleError from codecs import getincrementalencoder -from imp import load_source from numbers import Number from os.path import splitext, join -from io import open +from pathlib import Path from random import choice import sys import json @@ -19,7 +19,6 @@ import json from searx import settings from searx.version import VERSION_STRING from searx.languages import language_codes -from searx import settings from searx import logger from io import StringIO @@ -31,8 +30,8 @@ logger = logger.getChild('utils') blocked_tags = ('script', 'style') -useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__)) - + "/data/useragents.json", 'r', encoding='utf-8').read()) +with open(Path(__file__).parent / "data" / "useragents.json", encoding='utf-8') as fd: + useragents = json.load(fd) def searx_useragent(): @@ -161,7 +160,7 @@ def get_resources_directory(searx_directory, subdirectory, resources_directory): if not resources_directory: resources_directory = os.path.join(searx_directory, subdirectory) if not os.path.isdir(resources_directory): - raise Exception(directory + " is not a directory") + raise Exception(resources_directory + " is not a directory") return resources_directory @@ -356,7 +355,7 @@ def load_module(filename, module_dir): if modname in sys.modules: del sys.modules[modname] filepath = join(module_dir, filename) - module = load_source(modname, filepath) + module = SourceFileLoader(modname, filepath).load_module() module.name = modname return module diff --git a/searx/webapp.py b/searx/webapp.py index 84f9a504edc215a4d9648510110205d5c57b4410..22f8eb143e5c0675ca1179f0a32d5b9f4b9156a7 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -17,17 +17,10 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, ''' -if __name__ == '__main__': - from sys import path - from os.path import realpath, dirname - - path.append(realpath(dirname(realpath(__file__)) + '/../')) - import hashlib import hmac import json import os -import sys import time import copy @@ -35,21 +28,15 @@ import requests from searx import logger -logger = logger.getChild('webapp') -try: - from pygments import highlight - from pygments.lexers import get_lexer_by_name - from pygments.util import ClassNotFound - from pygments.formatters import HtmlFormatter -except ImportError: - logger.critical("cannot import dependency: pygments") - from sys import exit +from pygments import highlight +from pygments.lexers import get_lexer_by_name +from pygments.util import ClassNotFound +from pygments.formatters import HtmlFormatter - exit(1) from html import escape from datetime import datetime, timedelta -from werkzeug.contrib.fixers import ProxyFix +from werkzeug.middleware.proxy_fix import ProxyFix from flask import ( Flask, request, render_template, url_for, Response, make_response, redirect, send_from_directory @@ -80,19 +67,11 @@ from searx.url_utils import urlencode, urlparse, urljoin from searx.utils import new_hmac import threading -# check if the pyopenssl package is installed. -# It is needed for SSL connection without trouble, see #298 -try: - import OpenSSL.SSL # NOQA -except ImportError: - logger.critical("The pyopenssl package has to be installed.\n" - "Some HTTPS connections will fail") - -from io import StringIO - # serve pages with HTTP/1.1 from werkzeug.serving import WSGIRequestHandler +logger = logger.getChild('webapp') + WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0')) # about static @@ -319,10 +298,11 @@ def render(template_name, override_theme=None, **kwargs): if 'categories' not in kwargs: kwargs['categories'] = ['general'] - kwargs['categories'].extend(x for x in - sorted(categories.keys()) - if x != 'general' - and x in enabled_categories) + kwargs["categories"].extend( + x + for x in sorted(categories.keys()) + if x != "general" and x in enabled_categories + ) if 'all_categories' not in kwargs: kwargs['all_categories'] = ['general'] @@ -428,7 +408,7 @@ def pre_request(): else: try: preferences.parse_dict(request.form) - except Exception as e: + except Exception: logger.exception('invalid settings') request.errors.append(gettext('Invalid settings')) @@ -444,8 +424,9 @@ def pre_request(): allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: - if ((plugin.default_on and plugin.id not in disabled_plugins) - or plugin.id in allowed_plugins): + if ( + plugin.default_on and plugin.id not in disabled_plugins + ) or plugin.id in allowed_plugins: request.user_plugins.append(plugin) @@ -644,7 +625,6 @@ def preferences(): # render preferences image_proxy = request.preferences.get_value('image_proxy') - lang = request.preferences.get_value('language') disabled_engines = request.preferences.engines.get_disabled() allowed_plugins = request.preferences.plugins.get_enabled() @@ -690,11 +670,13 @@ def preferences(): def _is_selected_language_supported(engine, preferences): - language = preferences.get_value('language') - return (language == 'all' - or match_language(language, - getattr(engine, 'supported_languages', []), - getattr(engine, 'language_aliases', {}), None)) + language = preferences.get_value("language") + return language == "all" or match_language( + language, + getattr(engine, "supported_languages", []), + getattr(engine, "language_aliases", {}), + None, + ) @app.route('/image_proxy', methods=['GET'])