diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 96c4c67cb7f6958bceda8e6fd3ad8dfaad41f010..537afb85627cf8c8eaf3217b2018b6eb5310b333 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,6 +3,7 @@ image: $CI_REGISTRY_IMAGE/env:latest stages: - build - test + - report front-end: stage: build @@ -21,10 +22,40 @@ coding-rules: script: - ./manage.sh pep8_check -unittest: +unit-test: stage: test before_script: - ./manage.sh update_dev_packages script: - ./manage.sh unit_tests + artifacts: + paths: + - coverage + expire_in: 1 hour + +functional-test: + stage: test + image: docker:stable + services: + - docker:dind + variables: + DOCKER_HOST: tcp://docker:2375/ + DOCKER_DRIVER: overlay2 + before_script: + - docker run -id --rm -v $(pwd):/ws -e DOCKER_HOST=tcp://$(cat /etc/hosts | grep docker | cut -f1):2375/ -w /ws --name spotenv $CI_REGISTRY_IMAGE/env:latest sh + - docker exec -i spotenv ./manage.sh update_dev_packages + script: + - docker exec -i spotenv ./manage.sh functional_tests + artifacts: + paths: + - coverage + expire_in: 1 hour + +coverage: + stage: report + script: + - ./manage.sh coverage + dependencies: + - unit-test + - functional-test coverage: '/TOTAL.*\s+(\d+%)$/' diff --git a/Dockerfile b/Dockerfile index 6c0f11f862d1da487082e28fbae683e6a5062308..49981f1d264ab4f2077ad2909413479570ad5548 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,8 +6,6 @@ EXPOSE 8888 WORKDIR /usr/local/searx CMD ["python", "searx/webapp.py"] -RUN adduser -D -h /usr/local/searx -s /bin/sh searx searx - COPY requirements.txt ./requirements.txt RUN apk -U add \ @@ -22,6 +20,7 @@ RUN apk -U add \ ca-certificates \ && pip install --upgrade pip \ && pip install --no-cache -r requirements.txt \ + && pip install --no-cache coverage \ && apk del \ build-base \ libffi-dev \ @@ -32,11 +31,7 @@ RUN apk -U add \ ca-certificates \ && rm -f /var/cache/apk/* -COPY . . - -RUN chown -R searx:searx * - -USER searx +COPY searx /usr/local/searx/searx RUN sed -i "s/127.0.0.1/0.0.0.0/g" searx/settings.yml diff --git a/Dockerfile.env b/Dockerfile.env index 23ab4ec191a236746422d8539696fa5a1861d065..964dcbc5cac97d1af766e9949cd4d22933ff90a3 100644 --- a/Dockerfile.env +++ b/Dockerfile.env @@ -6,6 +6,7 @@ RUN dnf install -y\ wget\ python2-pip\ npm\ + docker \ && dnf groupinstall -y "Development Tools" \ && pip3 install ipdb ipython \ && pip3 install -r /requirements.txt \ diff --git a/README.rst b/README.rst index 371d9fbed24518190af48858a1cc9de5ef79e48a..970e2fe422c2a30518048220c7f8fab88d24d0f5 100644 --- a/README.rst +++ b/README.rst @@ -20,6 +20,7 @@ with one command. - Run the docker-compose **up** command to start the project ``docker-compose up --build`` - Getting the ip of the spot service and go to http://:8888 +- Or you can use the command line ``curl -X POST -F 'category=general' -F 'language=en-US' -F 'q=lequipe' -F 'time_range=' -F 'output=json' http://:8888/`` .. note:: Here the command to get the IP of the spot service ``docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' my-spot_spot_1`` diff --git a/docker-compose-coverage.yml b/docker-compose-coverage.yml new file mode 100644 index 0000000000000000000000000000000000000000..bd10ea3e48a6e89579fc2823c9d97417dd2561af --- /dev/null +++ b/docker-compose-coverage.yml @@ -0,0 +1,18 @@ +version: '3.6' + +services: + spot: + entrypoint: + - coverage + - run + - --source=searx + command: + - searx/webapp.py + volumes: + - coverage:/coverage + environment: + COVERAGE_FILE: /coverage/func + +volumes: + coverage: + name: spot-coverage diff --git a/manage.sh b/manage.sh index 4bfd69a398dfe9fc632b418a16fbdf68c0cee56f..79fdd244e7e1ebed1e38c750e9b2d3caac6214d5 100755 --- a/manage.sh +++ b/manage.sh @@ -8,6 +8,7 @@ set -e # subshell PYTHONPATH="$BASE_DIR" SEARX_DIR="$BASE_DIR/searx" +COV_DIR="$BASE_DIR/coverage" ACTION="$1" @@ -35,25 +36,39 @@ pep8_check() { # ignored rules: # E402 module level import not at top of file # W503 line break before binary operator - pep8 --exclude=searx/static --max-line-length=120 --ignore "E402,W503" "$SEARX_DIR" "$BASE_DIR/tests" + # E722 do not use bare 'except' + pycodestyle --exclude=searx/static --max-line-length=120 --ignore "E402,W503,E722" "$SEARX_DIR" "$BASE_DIR/tests" } unit_tests() { echo '[!] Running unit tests' - PYTHONPATH="$BASE_DIR" pytest --cov=searx --disable-pytest-warnings "$BASE_DIR/tests/unit" + mkdir -p "$COV_DIR" + chmod a+w "$COV_DIR" + PYTHONPATH="$BASE_DIR" COVERAGE_FILE="$COV_DIR"/unit pytest --cov=searx "$BASE_DIR/tests/unit" } -py_test_coverage() { - echo '[!] Running python test coverage' - PYTHONPATH="`pwd`" python3 -m nose2 -C --log-capture --with-coverage --coverage "$SEARX_DIR" -s "$BASE_DIR/tests/unit" \ - && coverage report \ - && coverage html +functional_tests() { + echo '[!] Running unit tests' + mkdir -p "$COV_DIR" + chmod a+w "$COV_DIR" + PYTHONPATH="$BASE_DIR" COMPOSE_FILE=docker-compose.yml:docker-compose-coverage.yml \ + pytest "$BASE_DIR/tests/functional" + docker run -itd --rm --name tmp-vol -v spot-coverage:/coverage alpine + docker cp tmp-vol:/coverage/func $COV_DIR + docker stop tmp-vol +} + +coverage() { + sed -i 's!/usr/local/searx!'$BASE_DIR'!g' "$COV_DIR"/func + coverage3 combine coverage/func coverage/unit + coverage3 report } tests() { set -e pep8_check unit_tests + functional_tests set +e } @@ -128,7 +143,6 @@ Commands ------------------ update_packages - Check & update production dependency changes update_dev_packages - Check & update development and production dependency changes - install_geckodriver - Download & install geckodriver if not already installed (required for robot_tests) npm_packages - Download & install npm dependencies (source manage.sh to update the PATH) Build @@ -140,10 +154,9 @@ Commands Tests ----- unit_tests - Run unit tests + functional_tests - Run functional tests pep8_check - Pep8 validation - robot_tests - Run selenium tests - tests - Run all python tests (pep8, unit, robot_tests) - py_test_coverage - Unit test coverage + tests - Run all python tests (pep8, unit, functional) " } diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000000000000000000000000000000000..838879d3891422414879226e129b4c111a36d079 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = -s --dockerc-build --dockerc-attach-network --disable-pytest-warnings diff --git a/requirements-dev.txt b/requirements-dev.txt index 0929b0ea85f841445b3d5bb496f4114e517641da..1de575509df51a7ab65090584ed94374bb422a9a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,7 @@ babel==2.3.4 mock==2.0.0 -pep8==1.7.0 +pycodestyle==2.4.0 mockredispy==2.9.3 pytest==4.1.0 pytest-cov==2.6.1 +pytest-dockerc==1.0.5 diff --git a/searx/__init__.py b/searx/__init__.py index 0dd557b7f3a1aa77f47f85a3ac6e8d002d0fb2ce..63c7f9ffe3516259b773b6d3a8e53c574c3a4ba2 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -23,7 +23,7 @@ from io import open from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION try: from yaml import load -except: +except ImportError: from sys import exit, stderr stderr.write('[E] install pyyaml\n') exit(2) @@ -38,6 +38,7 @@ def check_settings_yml(file_name): else: return None + # find location of settings.yml if 'SEARX_SETTINGS_PATH' in environ: # if possible set path to settings using the diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 876011f1d236d28f5ca2ac4c9de624801f869269..6d769577a0f7e644aa7d2e27cbd43531c94bbde7 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -116,7 +116,7 @@ def _fetch_supported_languages(resp): regions = dom.xpath(regions_xpath) for region in regions: - code = re.search('setmkt=[^\&]+', region).group()[7:] + code = re.search('setmkt=[^&]+', region).group()[7:] if code == 'nb-NO': code = 'no-NO' diff --git a/searx/engines/genius.py b/searx/engines/genius.py index b265e9d7699d9406f42fd734f17b6a3fbac7e5da..aa5afad9b6ad2454187dbc35ca768c9bc0b96bf7 100644 --- a/searx/engines/genius.py +++ b/searx/engines/genius.py @@ -72,6 +72,7 @@ def parse_album(hit): result.update({'content': 'Released: {}'.format(year)}) return result + parse = {'lyric': parse_lyric, 'song': parse_lyric, 'artist': parse_artist, 'album': parse_album} diff --git a/searx/engines/google.py b/searx/engines/google.py index 180e8fc09df6c50379e00aed2bb3296ad8d1b867..fc79d8ea46618a8a0407773fc7cf0058db181b8a 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -89,9 +89,8 @@ url_map = 'https://www.openstreetmap.org/'\ # search-url search_path = '/search' -search_url = ('https://{hostname}' + - search_path + - '?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&hl={lang_short}&ei=x') +search_url = ('https://{hostname}' + search_path + '?{query}' + '&start={offset}&gws_rd=cr&gbv=1&lr={lang}&hl={lang_short}&ei=x') time_range_search = "&tbs=qdr:{range}" time_range_dict = {'day': 'd', diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index 9a41b2dfa366260001a1877a64f4c3fc50880bb8..4274f0d07991218d8ee89d34950622f932d42eca 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -76,11 +76,11 @@ def response(resp): # get thumbnails script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text) id = result.xpath('.//div[@class="s"]//img/@id')[0] - thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id, + thumbnails_data = re.findall(r's=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id, script) tmp = [] if len(thumbnails_data) != 0: - tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0]) + tmp = re.findall(r'(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0]) thumbnail = '' if len(tmp) != 0: thumbnail = tmp[-1] diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py index f2fafec3a62a82d2e5ba4a236b29700a3a8bd98a..17b32ec615c9ae2137ad53f030cce18f52f3ce6f 100644 --- a/searx/engines/swisscows.py +++ b/searx/engines/swisscows.py @@ -28,10 +28,10 @@ search_string = '?{query}&page={page}' supported_languages_url = base_url # regex -regex_json = re.compile('initialData: {"Request":(.|\n)*},\s*environment') -regex_json_remove_start = re.compile('^initialData:\s*') -regex_json_remove_end = re.compile(',\s*environment$') -regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=') +regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment') +regex_json_remove_start = re.compile(r'^initialData:\s*') +regex_json_remove_end = re.compile(r',\s*environment$') +regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=') # do search-request diff --git a/searx/query.py b/searx/query.py index fe44496c65427c84da014ba209a555ecf0f83023..fdba2e0923b59a03e5b32f7e3432f023eb8a136d 100644 --- a/searx/query.py +++ b/searx/query.py @@ -49,7 +49,7 @@ class RawTextQuery(object): self.query_parts = [] # split query, including whitespaces - raw_query_parts = re.split(r'(\s+)' if isinstance(self.query, str) else b'(\s+)', self.query) + raw_query_parts = re.split(r'(\s+)' if isinstance(self.query, str) else r'(\s+)'.encode(), self.query) parse_next = True diff --git a/searx/results.py b/searx/results.py index cde7be1212dc460f52190f85db54da7a9f34d4e7..6c5b52eaf823d22eb90881db39df0c5827840a51 100644 --- a/searx/results.py +++ b/searx/results.py @@ -171,7 +171,7 @@ class ResultContainer(object): continue try: result['url'] = result['url'] - except: + except KeyError: pass if 'title' in result and not isinstance(result['title'], str): continue diff --git a/searx/utils.py b/searx/utils.py index c59ceca9e75915c2d86b0d8c16a0ea1b1e87167b..d1a28ea249563043eedb2d6de3eb62e0c2744ef3 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -6,6 +6,7 @@ import re from babel.core import get_global from babel.dates import format_date +from babel import UnknownLocaleError from codecs import getincrementalencoder from imp import load_source from numbers import Number @@ -205,7 +206,7 @@ def format_date_by_locale(date, locale_string): # to avoid crashing if locale is not supported by babel try: formatted_date = format_date(date, locale=locale_string) - except: + except UnknownLocaleError: formatted_date = format_date(date, "YYYY-MM-dd") return formatted_date @@ -255,7 +256,7 @@ def get_torrent_size(filesize, filesize_multiplier): filesize = int(filesize * 1000 * 1000) elif filesize_multiplier == 'KiB': filesize = int(filesize * 1000) - except: + except ValueError: filesize = None return filesize diff --git a/searx/webapp.py b/searx/webapp.py index ef7615fbf88ecdc4594aed5e9a6605d753c52f43..a6525a19e1acb09b97d2b51c6909dc082fb35dfd 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -40,8 +40,9 @@ logger = logger.getChild('webapp') try: from pygments import highlight from pygments.lexers import get_lexer_by_name + from pygments.util import ClassNotFound from pygments.formatters import HtmlFormatter -except: +except ImportError: logger.critical("cannot import dependency: pygments") from sys import exit @@ -178,7 +179,7 @@ def code_highlighter(codelines, language=None): try: # find lexer by programing language lexer = get_lexer_by_name(language, stripall=True) - except: + except ClassNotFound: # if lexer is not found, using default one logger.debug('highlighter cannot find lexer for {0}'.format(language)) lexer = get_lexer_by_name('text', stripall=True) @@ -452,21 +453,26 @@ def config_results(results, query): result['publishedDate'] = format_date(publishedDate) -def index_error(): - request.errors.append(gettext('search error')) - return render( - 'index.html', - ) +def index_error(exn, output): + user_error = gettext("search error") + if output == "json": + return jsonify({"error": f"{user_error}: {exn}"}) + + request.errors.append(user_error) + return render('index.html') @app.route('/search', methods=['GET', 'POST']) @app.route('/', methods=['GET', 'POST']) def index(): + # check the response format + output = request.form.get("output", "html") + # check if there is query if request.form.get('q') is None: - return render( - 'index.html', - ) + if output == 'json': + return jsonify({}), 204 + return render('index.html') selected_category = request.form.get('category') or 'general' first_page = request.form.get('pageno') @@ -489,9 +495,9 @@ def index(): # is it an invalid input parameter or something else ? if issubclass(e.__class__, SearxParameterException): - return index_error(), 400 + return index_error(e, output), 400 else: - return index_error(), 500 + return index_error(e, output), 500 if is_general_first_page: result_copy = copy.copy(search_data.results) @@ -512,8 +518,7 @@ def index(): config_results(images, search_data.query) config_results(videos, search_data.query) - return render( - 'results.html', + response = dict( results=search_data.results, q=search_data.query, selected_category=selected_category, @@ -521,12 +526,12 @@ def index(): time_range=search_data.time_range, number_of_results=format_decimal(search_data.results_number), advanced_search=request.form.get('advanced_search', None), - suggestions=search_data.suggestions, - answers=search_data.answers, - corrections=search_data.corrections, + suggestions=list(search_data.suggestions), + answers=list(search_data.answers), + corrections=list(search_data.corrections), infoboxes=search_data.infoboxes, paging=search_data.paging, - unresponsive_engines=search_data.unresponsive_engines, + unresponsive_engines=list(search_data.unresponsive_engines), current_language=match_language(search_data.language, LANGUAGE_CODES, fallback=settings['search']['language']), @@ -536,6 +541,9 @@ def index(): theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())] ) + if output == 'json': + return jsonify(response) + return render('results.html', **response) @app.route('/about', methods=['GET']) diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..cfade83ac302ceb8138139f4de95bd0117964731 --- /dev/null +++ b/tests/functional/conftest.py @@ -0,0 +1,33 @@ +import pytest +import redis +import requests + +from pytest_dockerc import Wait, Context + + +class SpotContext(Context): + @property + def url(self): + addr = self.container_addr("spot") + port = self.container_port("spot") + return f"http://{addr}:{port}" + + def wait_for_running_state(self): + Wait(ignored_exns=(requests.ConnectionError,))(lambda: requests.get(self.url)) + + +@pytest.fixture(scope="session") +def ctx(dockerc, dockerc_logs): + context = SpotContext(dockerc) + context.wait_for_running_state() + yield context + + +@pytest.fixture +def redisdb(ctx): + """ purge the db + """ + db = redis.Redis(ctx.container_addr("redis")) + for key in db.keys(): + db.delete(key) + yield db diff --git a/tests/functional/test_api.py b/tests/functional/test_api.py new file mode 100644 index 0000000000000000000000000000000000000000..3211d65c765138d825593d79a4b49b8d32755a60 --- /dev/null +++ b/tests/functional/test_api.py @@ -0,0 +1,29 @@ +import requests + + +class DataReq(dict): + def __init__(self, **kwargs): + self["category"] = kwargs.get("category", "general") + self["language"] = kwargs.get("language", "en-US") + self["q"] = kwargs.get("q", "lequipe.fr") + self["time_range"] = kwargs.get("time_range", "") + self["output"] = kwargs.get("output", "json") + + +def test_index(ctx, redisdb): + """ Test the main endpoint to ensure that some results are returned + """ + res = requests.post(ctx.url) + assert res.status_code == 200 + + res = requests.post(ctx.url, data={"output": "json"}) + assert res.status_code == 204 + + data = DataReq() + res = requests.post(ctx.url, data=data) + assert res.status_code == 200 + + response = res.json() + assert len(response["results"]) > 5 + assert len(response["image_results"]) == 5 + assert len(response['videos_results']) == 5 diff --git a/tests/unit/engines/test_currency_convert.py b/tests/unit/engines/test_currency_convert.py index dee74511027832beb1db2822353ed6ab0345dae4..ae7aa88b01bc9452b8dfd3f0d85661b18a48e86c 100644 --- a/tests/unit/engines/test_currency_convert.py +++ b/tests/unit/engines/test_currency_convert.py @@ -48,8 +48,11 @@ class TestCurrencyConvertEngine(TestCase): results = currency_convert.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) - self.assertEqual(results[0]['answer'], '10.0 GBP = 5.0 USD, 1 GBP (pound sterling)' + - ' = 0.5 USD (United States dollar)') + self.assertEqual( + results[0]['answer'], + ('10.0 GBP = 5.0 USD, 1 GBP (pound sterling)' + ' = 0.5 USD (United States dollar)') + ) target_url = 'https://duckduckgo.com/js/spice/currency/1/{}/{}'.format( dicto['from'], dicto['to'])