...
 
Commits (154)
comment: false
coverage:
status:
project:
default:
# basic
target: auto
threshold: null
base: auto
# advanced
branches: null
if_no_uploads: error
if_not_found: success
if_ci_failed: error
only_pulls: false
flags: null
paths: null
patch:
default:
# basic
target: auto
threshold: null
base: auto
# advanced
branches: null
if_no_uploads: error
if_not_found: success
if_ci_failed: error
only_pulls: false
flags: null
paths: null
[run]
branch = True
source =
searx/engines
searx/__init__.py
searx/autocomplete.py
searx/https_rewrite.py
searx/languages.py
searx/search.py
searx/testing.py
searx/utils.py
searx/webapp.py
[report]
show_missing = True
exclude_lines =
if __name__ == .__main__.:
[html]
directory = coverage
......@@ -15,5 +15,11 @@ setup.cfg
*~
node_modules/
yarn.lock
yarn-error.log
.tx/
.vscode
.idea
dump.rdb
image: $CI_REGISTRY_IMAGE/env:latest
stages:
- check
- build
- test
- report
- deploy
python:
stage: check
before_script:
- ./manage.sh update_dev_packages
script:
- ./manage.sh pep8_check
build:web:
stage: build
before_script:
- ./manage.sh npm_packages
- ./manage.sh update_dev_packages
script:
- ./manage.sh locales
- ./manage.sh styles
- ./manage.sh grunt_build
build:docker:
stage: build
before_script:
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
script:
- docker build -t $CI_REGISTRY_IMAGE:${CI_COMMIT_TAG:-latest} .
- docker push $CI_REGISTRY_IMAGE:${CI_COMMIT_TAG:-latest}
only:
- master
- tags
test:unit:
stage: test
before_script:
- ./manage.sh update_dev_packages
script:
- ./manage.sh unit_tests
artifacts:
paths:
- coverage
expire_in: 1 hour
test:functional:
stage: test
image: docker:stable
services:
- docker:dind
variables:
DOCKER_HOST: tcp://docker:2375/
DOCKER_DRIVER: overlay2
before_script:
- docker run -id --rm -v $(pwd):/ws -e DOCKER_HOST=tcp://$(cat /etc/hosts | grep docker | cut -f1):2375/ -w /ws --name spotenv $CI_REGISTRY_IMAGE/env:latest sh
- docker exec -i spotenv ./manage.sh update_dev_packages
script:
- docker exec -i spotenv ./manage.sh functional_tests
artifacts:
paths:
- coverage
expire_in: 1 hour
coverage:
stage: report
script:
- ./manage.sh coverage
dependencies:
- test:unit
- test:functional
coverage: '/TOTAL.*\s+(\d+%)$/'
deploy:test:
image: docker:stable
stage: deploy
only:
- branches
when: manual
variables:
PUBLISH_USER: root
PUBLISH_URL: spot.test.ecloud.global
GIT_STRATEGY: none
SPOT_HOSTNAME: spot.test.ecloud.global
dependencies: []
before_script:
- 'which ssh-agent || ( apk --update add openssh-client )'
- eval $(ssh-agent -s)
- echo "$SSH_PRIVATE_KEY_TEST" | tr -d '\r' | ssh-add - > /dev/null
- mkdir -p ~/.ssh
- chmod 700 ~/.ssh
- echo "$SSH_KNOWN_HOSTS" > ~/.ssh/known_hosts
- chmod 644 ~/.ssh/known_hosts
script:
- ssh -2 $PUBLISH_USER@$PUBLISH_URL 'if [ ! "$(docker ps -q -f name=proxy)" ] ; then docker run -d -p 80:80 --net my-network my-spot_default -v /var/run/docker.sock:/tmp/docker.sock:ro --restart unless-stopped --name proxy jwilder/nginx-proxy ; elif [ ! "$(docker ps -q -f name=proxy -f status=exited)" ] ; then docker start proxy ; fi'
- ssh -2 $PUBLISH_USER@$PUBLISH_URL "cd /root/my-spot/ && git fetch && git checkout $CI_COMMIT_SHA"
- ssh -2 $PUBLISH_USER@$PUBLISH_URL "SPOT_HOSTNAME=$SPOT_HOSTNAME && export SPOT_HOSTNAME && cd /root/my-spot/ && docker-compose pull && docker-compose up -d --build --force-recreate"
Spot for /e/ (e.foundation) was forked from Searx https://github.com/asciimoo/searx
Searx was created by Adam Tauber and is maintained by Adam Tauber, Alexandre Flament and Noémi Ványi.
Major contributing authors:
......@@ -98,3 +100,11 @@ generally made searx better:
- @ZEROF
- Ivan Skytte Jørgensen @isj-privacore
- @miicha
People who contributed to Spot:
- Johnny Kalajdzic
- Daniel Ramirez Martinez for e Foundation
- Olivier Deckmyn
- Omer Akram
FROM alpine:3.8
FROM python:3.7-alpine as builder
RUN apk add \
git \
build-base \
libxml2-dev \
libxslt-dev \
libffi-dev \
hiredis
# Only to use the docker cache and optimize the build time
WORKDIR /src
COPY requirements.txt /src/requirements.txt
RUN pip3 install --prefix /install -r requirements.txt
COPY . /src/
RUN PYTHONPATH=/install/lib/python3.7/site-packages/ python3 setup.py install --prefix /install
FROM python:3.7-alpine
LABEL maintainer="searx <https://github.com/asciimoo/searx>"
LABEL description="A privacy-respecting, hackable metasearch engine."
ENV BASE_URL=False IMAGE_PROXY=False HTTP_PROXY_URL= HTTPS_PROXY_URL=
RUN apk add \
ca-certificates \
libxslt \
&& pip install coverage
COPY --from=builder /install/ /usr/local/
EXPOSE 8888
WORKDIR /usr/local/searx
CMD ["/sbin/tini","--","/usr/local/searx/run.sh"]
RUN adduser -D -h /usr/local/searx -s /bin/sh searx searx \
&& echo '#!/bin/sh' >> run.sh \
&& echo 'sed -i "s|base_url : False|base_url : $BASE_URL|g" searx/settings.yml' >> run.sh \
&& echo 'sed -i "s/image_proxy : False/image_proxy : $IMAGE_PROXY/g" searx/settings.yml' >> run.sh \
&& echo 'sed -i "s/ultrasecretkey/`openssl rand -hex 16`/g" searx/settings.yml' >> run.sh \
&& echo 'if [ -n "$HTTP_PROXY_URL" ] || [ -n "$HTTPS_PROXY_URL" ]; then' >> run.sh \
&& echo ' sed -i "s~^# proxies :~ proxies:\\n http: ${HTTP_PROXY_URL}\\n https: ${HTTPS_PROXY_URL}\\n~" searx/settings.yml' >> run.sh \
&& echo 'fi' >> run.sh \
&& echo 'python searx/webapp.py' >> run.sh \
&& chmod +x run.sh
COPY requirements.txt ./requirements.txt
RUN echo "@commuedge http://nl.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories \
&& apk -U add \
build-base \
python \
python-dev \
py-pip \
libxml2 \
libxml2-dev \
libxslt \
libxslt-dev \
libffi-dev \
openssl \
openssl-dev \
ca-certificates \
tini@commuedge \
&& pip install --upgrade pip \
&& pip install --no-cache -r requirements.txt \
&& apk del \
build-base \
python-dev \
libffi-dev \
openssl-dev \
libxslt-dev \
libxml2-dev \
openssl-dev \
ca-certificates \
&& rm -f /var/cache/apk/*
COPY . .
RUN chown -R searx:searx *
USER searx
RUN sed -i "s/127.0.0.1/0.0.0.0/g" searx/settings.yml
STOPSIGNAL SIGINT
CMD ["searx-run"]
FROM fedora
COPY requirements.txt requirements-dev.txt /
RUN dnf install -y\
wget\
python2-pip\
npm\
docker \
&& dnf groupinstall -y "Development Tools" \
&& pip3 install ipdb ipython \
&& pip3 install -r /requirements.txt \
&& pip3 install -r /requirements-dev.txt \
&& rm -f /requirements.txt /requirements-dev.txt
searx
=====
spot for /e/ (https://e.foundation)
===================================
A privacy-respecting, hackable `metasearch
engine <https://en.wikipedia.org/wiki/Metasearch_engine>`__.
Pronunciation: səːks
Spot was forked from searx: read `documentation <https://asciimoo.github.io/searx>`__ and the `wiki <https://github.com/asciimoo/searx/wiki>`__ for more information.
List of `running
instances <https://github.com/asciimoo/searx/wiki/Searx-instances>`__.
Spot is based on Python3.7+ and asyncio.
See the `documentation <https://asciimoo.github.io/searx>`__ and the `wiki <https://github.com/asciimoo/searx/wiki>`__ for more information.
Getting Started
~~~~~~~~~~~~
|OpenCollective searx backers|
|OpenCollective searx sponsors|
You can run spot with docker-compose to run the **redis** database and
the **spot** service. First of all you have to install **docker** and
**docker-compose** on your host, then follow instructions below to run spot
with one command.
Installation
~~~~~~~~~~~~
- Run the docker-compose **up** command to start the project ``docker-compose up --build``
- Getting the ip of the spot service and go to http://<spot-ip>:8888
- clone source:
``git clone https://github.com/asciimoo/searx.git && cd searx``
- install dependencies: ``./manage.sh update_packages``
- edit your
`settings.yml <https://github.com/asciimoo/searx/blob/master/searx/settings.yml>`__
(set your ``secret_key``!)
- run ``python searx/webapp.py`` to start the application
.. note:: Here the command to get the IP of the spot service
``docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' my-spot_spot_1``
For all the details, follow this `step by step
You can also install **redis** and **spot** on your host, for all the details, follow this `step by step
installation <https://github.com/asciimoo/searx/wiki/Installation>`__.
Developer mode
~~~~~~~~~~~~
First run the redis database:
- ``docker-compose up -d redis``
Then on spot workdir run the following commands to start spot:
- ``docker-compose build spot``
- ``docker-compose run --rm -v $(pwd):/ws -w /ws -e PYTHONPATH=/ws spot sh``
- ``python3 -X dev searx/webapp.py``
Run tests:
- ``docker run -it --rm -v $(pwd):/ws -w /ws -v /var/run/docker.sock:/var/run/docker.sock -e PYTHONPATH=/ws registry.gitlab.e.foundation:5000/e/cloud/my-spot/env bash``
- ``pytest --pdb --pdbcls IPython.terminal.debugger:TerminalPdb tests``
Bugs
~~~~
......@@ -43,11 +58,3 @@ More about searx
- `openhub <https://www.openhub.net/p/searx/>`__
- `twitter <https://twitter.com/Searx_engine>`__
- IRC: #searx @ freenode
.. |OpenCollective searx backers| image:: https://opencollective.com/searx/backers/badge.svg
:target: https://opencollective.com/searx#backer
.. |OpenCollective searx sponsors| image:: https://opencollective.com/searx/sponsors/badge.svg
:target: https://opencollective.com/searx#sponsor
# add tests
# static page only on dev mode
# remove usage of requests
# remove last use of threading
version: '3.6'
services:
spot:
entrypoint:
- coverage
- run
- --source=searx
command:
- /usr/local/bin/searx-run
volumes:
- coverage:/coverage
environment:
COVERAGE_FILE: /coverage/func
volumes:
coverage:
name: spot-coverage
version: '3.6'
services:
redis:
image: redis:5-alpine
spot:
build: .
environment:
SEARX_REDIS_HOST: redis
VIRTUAL_HOST: ${SPOT_HOSTNAME:-spot}
SEARX_LOGGER: INFO
......@@ -8,6 +8,7 @@ set -e
# subshell
PYTHONPATH="$BASE_DIR"
SEARX_DIR="$BASE_DIR/searx"
COV_DIR="$BASE_DIR/coverage"
ACTION="$1"
......@@ -16,54 +17,14 @@ ACTION="$1"
#
update_packages() {
pip install --upgrade pip
pip install --upgrade setuptools
pip install -r "$BASE_DIR/requirements.txt"
pip3 install --upgrade pip
pip3 install --upgrade setuptools
pip3 install -r "$BASE_DIR/requirements.txt"
}
update_dev_packages() {
update_packages
pip install -r "$BASE_DIR/requirements-dev.txt"
}
install_geckodriver() {
echo '[!] Checking geckodriver'
# TODO : check the current geckodriver version
set -e
geckodriver -V > /dev/null 2>&1 || NOTFOUND=1
set +e
if [ -z "$NOTFOUND" ]; then
return
fi
GECKODRIVER_VERSION="v0.19.1"
PLATFORM="`python -c "import six; import platform; six.print_(platform.system().lower(), platform.architecture()[0])"`"
case "$PLATFORM" in
"linux 32bit" | "linux2 32bit") ARCH="linux32";;
"linux 64bit" | "linux2 64bit") ARCH="linux64";;
"windows 32 bit") ARCH="win32";;
"windows 64 bit") ARCH="win64";;
"mac 64bit") ARCH="macos";;
esac
GECKODRIVER_URL="https://github.com/mozilla/geckodriver/releases/download/$GECKODRIVER_VERSION/geckodriver-$GECKODRIVER_VERSION-$ARCH.tar.gz";
if [ -z "$1" ]; then
if [ -z "$VIRTUAL_ENV" ]; then
printf "geckodriver can't be installed because VIRTUAL_ENV is not set, you should download it from\n %s" "$GECKODRIVER_URL"
exit
else
GECKODRIVER_DIR="$VIRTUAL_ENV/bin"
fi
else
GECKODRIVER_DIR="$1"
mkdir -p -- "$GECKODRIVER_DIR"
fi
printf "Installing %s/geckodriver from\n %s" "$GECKODRIVER_DIR" "$GECKODRIVER_URL"
FILE="`mktemp`"
wget -qO "$FILE" -- "$GECKODRIVER_URL" && tar xz -C "$GECKODRIVER_DIR" -f "$FILE" geckodriver
rm -- "$FILE"
chmod 777 -- "$GECKODRIVER_DIR/geckodriver"
pip3 install -r "$BASE_DIR/requirements-dev.txt"
}
locales() {
......@@ -75,32 +36,40 @@ pep8_check() {
# ignored rules:
# E402 module level import not at top of file
# W503 line break before binary operator
pep8 --exclude=searx/static --max-line-length=120 --ignore "E402,W503" "$SEARX_DIR" "$BASE_DIR/tests"
# E722 do not use bare 'except'
pycodestyle --exclude=searx/static --max-line-length=120 --ignore "E402,W503,E722" "$SEARX_DIR" "$BASE_DIR/tests"
flake8 --ignore=E722 $SEARX_DIR/*.py
}
unit_tests() {
echo '[!] Running unit tests'
python -m nose2 -s "$BASE_DIR/tests/unit"
mkdir -p "$COV_DIR"
chmod a+w "$COV_DIR"
PYTHONPATH="$BASE_DIR" COVERAGE_FILE="$COV_DIR"/unit pytest --cov=searx "$BASE_DIR/tests/unit"
}
py_test_coverage() {
echo '[!] Running python test coverage'
PYTHONPATH="`pwd`" python -m nose2 -C --log-capture --with-coverage --coverage "$SEARX_DIR" -s "$BASE_DIR/tests/unit" \
&& coverage report \
&& coverage html
functional_tests() {
echo '[!] Running unit tests'
mkdir -p "$COV_DIR"
chmod a+w "$COV_DIR"
PYTHONPATH="$BASE_DIR" COMPOSE_FILE=docker-compose.yml:docker-compose-coverage.yml \
pytest "$BASE_DIR/tests/functional"
docker run -itd --rm --name tmp-vol -v spot-coverage:/coverage alpine
docker cp tmp-vol:/coverage/func $COV_DIR
docker stop tmp-vol
}
robot_tests() {
echo '[!] Running robot tests'
PYTHONPATH="`pwd`" python "$SEARX_DIR/testing.py" robot
coverage() {
sed -i 's!/usr/local/lib/python3.7/site-packages/searx[^/]*/searx!'$SEARX_DIR'!g' "$COV_DIR"/func
coverage3 combine coverage/func coverage/unit
coverage3 report
}
tests() {
set -e
pep8_check
unit_tests
install_geckodriver
robot_tests
functional_tests
set +e
}
......@@ -110,7 +79,7 @@ tests() {
#
npm_path_setup() {
which npm || (printf 'Error: npm is not found\n'; exit 1)
which npm &>/dev/null || whereis npm &>/dev/null || (printf 'Error: npm is not found\n'; exit 1)
export PATH="$(npm bin)":$PATH
}
......@@ -152,6 +121,7 @@ styles() {
}
grunt_build() {
npm_path_setup
echo '[!] Grunt build : oscar theme'
grunt --gruntfile "$SEARX_DIR/static/themes/oscar/gruntfile.js"
echo '[!] Grunt build : simple theme'
......@@ -174,7 +144,6 @@ Commands
------------------
update_packages - Check & update production dependency changes
update_dev_packages - Check & update development and production dependency changes
install_geckodriver - Download & install geckodriver if not already installed (required for robot_tests)
npm_packages - Download & install npm dependencies (source manage.sh to update the PATH)
Build
......@@ -186,10 +155,9 @@ Commands
Tests
-----
unit_tests - Run unit tests
functional_tests - Run functional tests
pep8_check - Pep8 validation
robot_tests - Run selenium tests
tests - Run all python tests (pep8, unit, robot_tests)
py_test_coverage - Unit test coverage
tests - Run all python tests (pep8, unit, functional)
"
}
......
[pytest]
addopts = -s --dockerc-build --dockerc-attach-network --disable-pytest-warnings
babel==2.3.4
mock==2.0.0
nose2[coverage-plugin]
cov-core==1.15.0
pep8==1.7.0
plone.testing==5.0.0
splinter==0.7.5
transifex-client==0.12.2
unittest2==1.1.0
zope.testrunner==4.5.1
selenium==3.5.0
pycodestyle==2.5.0
flake8==3.7.7
mockredispy==2.9.3
pytest==4.1.0
pytest-cov==2.6.1
pytest-dockerc==1.0.5
certifi==2018.11.29
flask-babel==0.12.2
flask==1.0.2
idna==2.8
jinja2==2.10
lxml==4.3.0
pygments==2.1.3
pyopenssl==19.0.0
python-dateutil==2.7.5
pyyaml==4.2b1
lxml==4.3.3
pygments==2.3.1
python-dateutil==2.8.0
pyyaml==5.1
requests[socks]==2.21.0
aioredis==1.2.0
aiohttp==3.5.4
cchardet==2.1.4
aiodns==2.0.0
aiohttp_jinja2==1.1.0
Babel==2.6.0
aiohttpbabel==0.0.7
......@@ -15,18 +15,11 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
'''
import certifi
import logging
from os import environ
from os.path import realpath, dirname, join, abspath, isfile
from io import open
from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION
try:
from yaml import safe_load
except:
from sys import exit, stderr
stderr.write('[E] install pyyaml\n')
exit(2)
import yaml
searx_dir = abspath(dirname(__file__))
engine_dir = dirname(realpath(__file__))
......@@ -38,6 +31,7 @@ def check_settings_yml(file_name):
else:
return None
# find location of settings.yml
if 'SEARX_SETTINGS_PATH' in environ:
# if possible set path to settings using the
......@@ -52,7 +46,11 @@ if not settings_path:
# load settings
with open(settings_path, 'r', encoding='utf-8') as settings_yaml:
settings = safe_load(settings_yaml)
# XXX: docker-compose does not support yet yaml >= 5
if int(yaml.__version__.split('.')[0]) >= 5:
settings = yaml.load(settings_yaml, Loader=yaml.FullLoader)
else:
settings = yaml.load(settings_yaml)
'''
enable debug if
......@@ -65,29 +63,26 @@ the environnement variable SEARX_DEBUG is 0 or false
(whatever the value in settings.yml)
or general.debug=False in settings.yml
'''
searx_debug_env = environ.get('SEARX_DEBUG', '').lower()
if searx_debug_env == 'true' or searx_debug_env == '1':
searx_debug = True
elif searx_debug_env == 'false' or searx_debug_env == '0':
searx_debug = False
else:
searx_debug = settings.get('general', {}).get('debug')
if searx_debug:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
searx_debug = True if settings.get('general', {}).get('debug') else False
searx_loglevel = 'DEBUG' if searx_debug else 'WARNING'
searx_loglevel = environ.get('SEARX_LOGGER', searx_loglevel).upper()
logging.basicConfig(level=getattr(logging, searx_loglevel))
logger = logging.getLogger('searx')
logger.debug('read configuration from %s', settings_path)
# Workaround for openssl versions <1.0.2
# https://github.com/certifi/python-certifi/issues/26
if OPENSSL_VERSION_INFO[0:3] < (1, 0, 2):
if hasattr(certifi, 'old_where'):
environ['REQUESTS_CA_BUNDLE'] = certifi.old_where()
logger.warning('You are using an old openssl version({0}), please upgrade above 1.0.2!'.format(OPENSSL_VERSION))
logger.info('Initialisation done')
if 'SEARX_SECRET' in environ:
settings['server']['secret_key'] = environ['SEARX_SECRET']
if 'BASE_URL' in environ:
settings['server']['base_url'] = environ['BASE_URL']
if 'IMAGE_PROXY' in environ:
settings['server']['image_proxy'] = environ['IMAGE_PROXY']
if 'SEARX_REDIS_HOST' in environ:
settings['redis']['enable'] = True
settings['redis']['host'] = environ['SEARX_REDIS_HOST']
if 'HTTP_PROXY_URL' in environ:
settings['proxies']['http'] = environ['HTTP_PROXY_URL']
if 'HTTPS_PROXY_URL' in environ:
settings['proxies']['https'] = environ['HTTPS_PROXY_URL']
from os import listdir
from os.path import realpath, dirname, join, isdir
from sys import version_info
from searx.utils import load_module
from collections import defaultdict
if version_info[0] == 3:
unicode = str
answerers_dir = dirname(realpath(__file__))
......@@ -34,12 +30,12 @@ def get_answerers_by_keywords(answerers):
def ask(query):
results = []
query_parts = list(filter(None, query.query.split()))
query_parts = list([_f for _f in query.query.split() if _f])
if query_parts[0].decode('utf-8') not in answerers_by_keywords:
if query_parts[0] not in answerers_by_keywords:
return results
for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
for answerer in answerers_by_keywords[query_parts[0]]:
result = answerer(query)
if result:
results.append(result)
......
......@@ -3,7 +3,7 @@ import random
import string
import sys
import uuid
from flask_babel import gettext
from gettext import gettext
# required answerer attribute
# specifies which search query keywords triggers this answerer
......@@ -11,11 +11,7 @@ keywords = ('random',)
random_int_max = 2**31
if sys.version_info[0] == 2:
random_string_letters = string.lowercase + string.digits + string.uppercase
else:
unicode = str
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
def random_characters():
......@@ -24,32 +20,32 @@ def random_characters():
def random_string():
return u''.join(random_characters())
return ''.join(random_characters())
def random_float():
return unicode(random.random())
return str(random.random())
def random_int():
return unicode(random.randint(-random_int_max, random_int_max))
return str(random.randint(-random_int_max, random_int_max))
def random_sha256():
m = hashlib.sha256()
m.update(b''.join(random_characters()))
return unicode(m.hexdigest())
m.update(''.join(random_characters()).encode())
return m.hexdigest()
def random_uuid():
return unicode(uuid.uuid4())
return str(uuid.uuid4())
random_types = {b'string': random_string,
b'int': random_int,
b'float': random_float,
b'sha256': random_sha256,
b'uuid': random_uuid}
random_types = {'string': random_string,
'int': random_int,
'float': random_float,
'sha256': random_sha256,
'uuid': random_uuid}
# required answerer function
......@@ -70,4 +66,4 @@ def answer(query):
def self_info():
return {'name': gettext('Random value generator'),
'description': gettext('Generate different random values'),
'examples': [u'random {}'.format(x) for x in random_types]}
'examples': ['random {}'.format(x) for x in random_types]}
from sys import version_info
from functools import reduce
from operator import mul
from flask_babel import gettext
if version_info[0] == 3:
unicode = str
from gettext import gettext
keywords = ('min',
'max',
......@@ -30,21 +26,21 @@ def answer(query):
func = parts[0]
answer = None
if func == b'min':
if func == 'min':
answer = min(args)
elif func == b'max':
elif func == 'max':
answer = max(args)
elif func == b'avg':
elif func == 'avg':
answer = sum(args) / len(args)
elif func == b'sum':
elif func == 'sum':
answer = sum(args)
elif func == b'prod':
elif func == 'prod':
answer = reduce(mul, args, 1)
if answer is None:
return []
return [{'answer': unicode(answer)}]
return [{'answer': answer}]
# required answerer function
......
......@@ -15,7 +15,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
'''
import requests
from lxml import etree
from json import loads
from searx import settings
......@@ -23,15 +23,13 @@ from searx.languages import language_codes
from searx.engines import (
categories, engines, engine_shortcuts
)
from searx.poolrequests import get as http_get
from searx.url_utils import urlencode
def get(*args, **kwargs):
if 'timeout' not in kwargs:
kwargs['timeout'] = settings['outgoing']['request_timeout']
return http_get(*args, **kwargs)
return requests.get(*args, **kwargs)
def searx_bang(full_query):
......@@ -81,22 +79,22 @@ def searx_bang(full_query):
engine_query = full_query.getSearchQuery()[1:]
for lc in language_codes:
lang_id, lang_name, country, english_name = map(unicode.lower, lc)
lang_id, lang_name, country, english_name = map(str.lower, lc)
# check if query starts with language-id
if lang_id.startswith(engine_query):
if len(engine_query) <= 2:
results.append(u':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
else:
results.append(u':{lang_id}'.format(lang_id=lang_id))
results.append(':{lang_id}'.format(lang_id=lang_id))
# check if query starts with language name
if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
results.append(u':{lang_name}'.format(lang_name=lang_name))
results.append(':{lang_name}'.format(lang_name=lang_name))
# check if query starts with country
if country.startswith(engine_query.replace('_', ' ')):
results.append(u':{country}'.format(country=country.replace(' ', '_')))
results.append(':{country}'.format(country=country.replace(' ', '_')))
# remove duplicates
result_set = set(results)
......@@ -130,9 +128,10 @@ def duckduckgo(query, lang):
# duckduckgo autocompleter
url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
resp = loads(get(url.format(urlencode(dict(q=query)))).text)
if len(resp) > 1:
return resp[1]
resp = get(url.format(urlencode(dict(q=query))))
if resp.status_code == 200 and resp.text:
res = loads(resp.text)
return res[1] if len(res) == 2 else []
return []
......
......@@ -18,12 +18,11 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
import sys
import threading
from os.path import realpath, dirname
from io import open
import json
from pathlib import Path
from babel.localedata import locale_identifiers
from flask_babel import gettext
from gettext import gettext
from operator import itemgetter
from json import loads
from requests import get
from searx import settings
from searx import logger
......@@ -32,13 +31,14 @@ from searx.utils import load_module, match_language
logger = logger.getChild('engines')
engine_dir = dirname(realpath(__file__))
engine_dir = Path(__file__).parent
engines = {}
categories = {'general': []}
languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read())
with open(engine_dir.parent / "data" / "engines_languages.json", encoding='utf-8') as fd:
languages = json.load(fd)
babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]
......
......@@ -63,7 +63,7 @@ def response(resp):
except:
pass
# I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
content = u'Category: "{category}".'
content = 'Category: "{category}".'
content = content.format(category=category)
results.append({'url': href,
......
......@@ -105,7 +105,7 @@ def request(query, params):
# if our language is hosted on the main site, we need to add its name
# to the query in order to narrow the results to that language
if language in main_langs:
query += b' (' + main_langs[language] + b')'
query += ' (' + main_langs[language] + ')'
# prepare the request parameters
query = urlencode({'search': query})
......
......@@ -61,7 +61,7 @@ def response(resp):
content = content_string.format(doi_content="", abstract_content=abstract)
if len(content) > 300:
content = content[0:300] + "..."
content = content[0:300] + "..."
# TODO: center snippet on query term
publishedDate = datetime.strptime(entry.xpath('.//published')[0].text, '%Y-%m-%dT%H:%M:%SZ')
......
......@@ -39,7 +39,7 @@ def request(query, params):
else:
lang = match_language(params['language'], supported_languages, language_aliases)
query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')
query = 'language:{} {}'.format(lang.split('-')[0].upper(), query)
search_path = search_string.format(
query=urlencode({'q': query}),
......
......@@ -116,7 +116,7 @@ def _fetch_supported_languages(resp):
regions = dom.xpath(regions_xpath)
for region in regions:
code = re.search('setmkt=[^\&]+', region).group()[7:]
code = re.search('setmkt=[^&]+', region).group()[7:]
if code == 'nb-NO':
code = 'no-NO'
......
"""
Bing (Videos)
@website https://www.bing.com/videos
@provide-api yes (http://datamarket.azure.com/dataset/bing/search)
@using-api no
@results HTML
@stable no
@parse url, title, content, thumbnail
"""
from json import loads
from lxml import html
from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
from searx.utils import match_language
......@@ -48,7 +44,8 @@ def request(query, params):
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
# language cookie
language = match_language(params['language'], supported_languages, language_aliases).lower()
# NOTE: supported_languages initiated dynamically
language = match_language(params['language'], supported_languages).lower()
params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
# query and paging
......
import json
import re
import os
import sys
import unicodedata
from io import open
from pathlib import Path
from datetime import datetime
if sys.version_info[0] == 3:
unicode = str
categories = []
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
weight = 100
parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
db = 1
def normalize_name(name):
name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
name = name.lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower()
......@@ -90,10 +87,10 @@ def response(resp):
def load():
global db
current_dir = os.path.dirname(os.path.realpath(__file__))
json_data = open(current_dir + "/../data/currencies.json", 'r', encoding='utf-8').read()
db = json.loads(json_data)
with open(
Path(__file__).parent.parent / "data" / "currencies.json", encoding='utf-8'
) as fd:
db = json.load(fd)
load()
......@@ -50,7 +50,7 @@ def response(resp):
if url.startswith('http://'):
url = 'https' + url[4:]
content = u'{} - {} - {}'.format(
content = '{} - {} - {}'.format(
result['artist']['name'],
result['album']['title'],
result['title'])
......
......@@ -15,10 +15,10 @@ from searx.utils import is_valid_lang
from searx.url_utils import urljoin
categories = ['general']
url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
results_xpath = './/table[@id="r"]/tr'
......@@ -37,7 +37,7 @@ def request(query, params):
params['url'] = url.format(from_lang=from_lang[2],
to_lang=to_lang[2],
query=query.decode('utf-8'))
query=query)
return params
......
......@@ -10,15 +10,11 @@
@parse url, title, content, magnetlink
"""
from sys import version_info
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
from searx.url_utils import urljoin
if version_info[0] == 3:
unicode = str
categories = ['videos', 'music', 'files']
paging = True
......
......@@ -16,7 +16,6 @@
from lxml.html import fromstring
from json import loads
from searx.engines.xpath import extract_text
from searx.poolrequests import get
from searx.url_utils import urlencode
from searx.utils import match_language
......
......@@ -12,14 +12,13 @@
@todo avoid extra request
"""
import requests
from json import loads
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import (
_fetch_supported_languages, supported_languages_url,
get_region_code, language_aliases
)
from searx.poolrequests import get
from searx.url_utils import urlencode
# engine dependent config
......@@ -36,8 +35,7 @@ site_url = 'https://duckduckgo.com/?{query}&iar=images&iax=1&ia=images'
# run query in site to get vqd number needed for requesting images
# TODO: find a way to get this number without an extra request (is it a hash of the query?)
def get_vqd(query, headers):
query_url = site_url.format(query=urlencode({'q': query}))
res = get(query_url, headers=headers)
res = requests.get(site_url.format(query=urlencode({'q': query})), headers=headers)
content = res.text
if content.find('vqd=\'') == -1:
raise Exception('Request failed')
......
from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
from html.parser import HTMLParser
url = 'http://www.filecrop.com/'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
......
......@@ -72,6 +72,7 @@ def parse_album(hit):
result.update({'content': 'Released: {}'.format(year)})
return result
parse = {'lyric': parse_lyric, 'song': parse_lyric, 'artist': parse_artist, 'album': parse_album}
......
......@@ -90,7 +90,7 @@ def request(query, params):
# if our language is hosted on the main site, we need to add its name
# to the query in order to narrow the results to that language
if language in main_langs:
query += b' (' + (main_langs[language]).encode('utf-8') + b')'
query += ' (' + (main_langs[language]) + ')'
# prepare the request parameters
query = urlencode({'search': query})
......
......@@ -9,7 +9,7 @@
# @parse url, title, content, suggestion
import re
from flask_babel import gettext
from gettext import gettext
from lxml import html, etree
from searx.engines.xpath import extract_text, extract_url
from searx import logger
......@@ -89,9 +89,8 @@ url_map = 'https://www.openstreetmap.org/'\
# search-url
search_path = '/search'
search_url = ('https://{hostname}' +
search_path +
'?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&hl={lang_short}&ei=x')
search_url = ('https://{hostname}' + search_path + '?{query}'
'&start={offset}&gws_rd=cr&gbv=1&lr={lang}&hl={lang_short}&ei=x')
time_range_search = "&tbs=qdr:{range}"
time_range_dict = {'day': 'd',
......@@ -225,7 +224,7 @@ def response(resp):
instant_answer = dom.xpath('//div[@id="_vBb"]//text()')
if instant_answer:
results.append({'answer': u' '.join(instant_answer)})
results.append({'answer': ' '.join(instant_answer)})
try:
results_num = int(dom.xpath('//div[@id="resultStats"]//text()')[0]
.split()[1].replace(',', ''))
......
......@@ -76,11 +76,11 @@ def response(resp):
# get thumbnails
script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
id = result.xpath('.//div[@class="s"]//img/@id')[0]
thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
thumbnails_data = re.findall(r's=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
script)
tmp = []
if len(thumbnails_data) != 0:
tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
tmp = re.findall(r'(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
thumbnail = ''
if len(tmp) != 0:
thumbnail = tmp[-1]
......
......@@ -16,11 +16,7 @@ from lxml import html
from dateutil import parser
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser