Commit 2cc4bf3e authored by Nicolas Gelot's avatar Nicolas Gelot
Browse files

Move to Python3

Move spot project to python 3. The support of python 2 is
removed related to the issue https://github.com/asciimoo/searx/issues/1471.

The update is mainly done with `2to3` tool and a lot of changes for
the unicode support (see https://docs.python.org/3/howto/unicode.html)

Close: #7
parent dc3f8b7b
FROM alpine:3.8
FROM python:3.7-alpine
LABEL maintainer="searx <https://github.com/asciimoo/searx>"
LABEL description="A privacy-respecting, hackable metasearch engine."
......@@ -12,9 +12,6 @@ COPY requirements.txt ./requirements.txt
RUN apk -U add \
build-base \
python \
python-dev \
py-pip \
libxml2 \
libxml2-dev \
libxslt \
......@@ -27,7 +24,6 @@ RUN apk -U add \
&& pip install --no-cache -r requirements.txt \
&& apk del \
build-base \
python-dev \
libffi-dev \
openssl-dev \
libxslt-dev \
......
......@@ -5,4 +5,4 @@ RUN dnf install -y\
python2-pip\
npm\
&& dnf groupinstall -y "Development Tools" \
&& pip install pytest ipdb ipython \
&& pip3 install pytest ipdb ipython
......@@ -16,14 +16,14 @@ ACTION="$1"
#
update_packages() {
pip install --upgrade pip
pip install --upgrade setuptools
pip install -r "$BASE_DIR/requirements.txt"
pip3 install --upgrade pip
pip3 install --upgrade setuptools
pip3 install -r "$BASE_DIR/requirements.txt"
}
update_dev_packages() {
update_packages
pip install -r "$BASE_DIR/requirements-dev.txt"
pip3 install -r "$BASE_DIR/requirements-dev.txt"
}
install_geckodriver() {
......@@ -36,7 +36,7 @@ install_geckodriver() {
return
fi
GECKODRIVER_VERSION="v0.19.1"
PLATFORM="`python -c "import six; import platform; six.print_(platform.system().lower(), platform.architecture()[0])"`"
PLATFORM="`python3 -c "import platform; print(platform.system().lower(), platform.architecture()[0])"`"
case "$PLATFORM" in
"linux 32bit" | "linux2 32bit") ARCH="linux32";;
"linux 64bit" | "linux2 64bit") ARCH="linux64";;
......@@ -80,19 +80,19 @@ pep8_check() {
unit_tests() {
echo '[!] Running unit tests'
python -m nose2 -s "$BASE_DIR/tests/unit"
python3 -m nose2 -s "$BASE_DIR/tests/unit"
}
py_test_coverage() {
echo '[!] Running python test coverage'
PYTHONPATH="`pwd`" python -m nose2 -C --log-capture --with-coverage --coverage "$SEARX_DIR" -s "$BASE_DIR/tests/unit" \
PYTHONPATH="`pwd`" python3 -m nose2 -C --log-capture --with-coverage --coverage "$SEARX_DIR" -s "$BASE_DIR/tests/unit" \
&& coverage report \
&& coverage html
}
robot_tests() {
echo '[!] Running robot tests'
PYTHONPATH="`pwd`" python "$SEARX_DIR/testing.py" robot
PYTHONPATH="`pwd`" python3 "$SEARX_DIR/testing.py" robot
}
tests() {
......
from os import listdir
from os.path import realpath, dirname, join, isdir
from sys import version_info
from searx.utils import load_module
from collections import defaultdict
if version_info[0] == 3:
unicode = str
answerers_dir = dirname(realpath(__file__))
......@@ -34,12 +30,12 @@ def get_answerers_by_keywords(answerers):
def ask(query):
results = []
query_parts = list(filter(None, query.query.split()))
query_parts = list([_f for _f in query.query.split() if _f])
if query_parts[0].decode('utf-8') not in answerers_by_keywords:
if query_parts[0] not in answerers_by_keywords:
return results
for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
for answerer in answerers_by_keywords[query_parts[0]]:
result = answerer(query)
if result:
results.append(result)
......
......@@ -11,11 +11,7 @@ keywords = ('random',)
random_int_max = 2**31
if sys.version_info[0] == 2:
random_string_letters = string.lowercase + string.digits + string.uppercase
else:
unicode = str
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
def random_characters():
......@@ -24,32 +20,32 @@ def random_characters():
def random_string():
return u''.join(random_characters())
return ''.join(random_characters())
def random_float():
return unicode(random.random())
return str(random.random())
def random_int():
return unicode(random.randint(-random_int_max, random_int_max))
return str(random.randint(-random_int_max, random_int_max))
def random_sha256():
m = hashlib.sha256()
m.update(b''.join(random_characters()))
return unicode(m.hexdigest())
m.update(''.join(random_characters()).encode())
return m.hexdigest()
def random_uuid():
return unicode(uuid.uuid4())
return str(uuid.uuid4())
random_types = {b'string': random_string,
b'int': random_int,
b'float': random_float,
b'sha256': random_sha256,
b'uuid': random_uuid}
random_types = {'string': random_string,
'int': random_int,
'float': random_float,
'sha256': random_sha256,
'uuid': random_uuid}
# required answerer function
......@@ -70,4 +66,4 @@ def answer(query):
def self_info():
return {'name': gettext('Random value generator'),
'description': gettext('Generate different random values'),
'examples': [u'random {}'.format(x) for x in random_types]}
'examples': ['random {}'.format(x) for x in random_types]}
from sys import version_info
from functools import reduce
from operator import mul
from flask_babel import gettext
if version_info[0] == 3:
unicode = str
keywords = ('min',
'max',
'avg',
......@@ -30,21 +26,21 @@ def answer(query):
func = parts[0]
answer = None
if func == b'min':
if func == 'min':
answer = min(args)
elif func == b'max':
elif func == 'max':
answer = max(args)
elif func == b'avg':
elif func == 'avg':
answer = sum(args) / len(args)
elif func == b'sum':
elif func == 'sum':
answer = sum(args)
elif func == b'prod':
elif func == 'prod':
answer = reduce(mul, args, 1)
if answer is None:
return []
return [{'answer': unicode(answer)}]
return [{'answer': answer}]
# required answerer function
......
......@@ -81,22 +81,22 @@ def searx_bang(full_query):
engine_query = full_query.getSearchQuery()[1:]
for lc in language_codes:
lang_id, lang_name, country, english_name = map(unicode.lower, lc)
lang_id, lang_name, country, english_name = map(str.lower, lc)
# check if query starts with language-id
if lang_id.startswith(engine_query):
if len(engine_query) <= 2:
results.append(u':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
else:
results.append(u':{lang_id}'.format(lang_id=lang_id))
results.append(':{lang_id}'.format(lang_id=lang_id))
# check if query starts with language name
if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
results.append(u':{lang_name}'.format(lang_name=lang_name))
results.append(':{lang_name}'.format(lang_name=lang_name))
# check if query starts with country
if country.startswith(engine_query.replace('_', ' ')):
results.append(u':{country}'.format(country=country.replace(' ', '_')))
results.append(':{country}'.format(country=country.replace(' ', '_')))
# remove duplicates
result_set = set(results)
......
......@@ -63,7 +63,7 @@ def response(resp):
except:
pass
# I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
content = u'Category: "{category}".'
content = 'Category: "{category}".'
content = content.format(category=category)
results.append({'url': href,
......
......@@ -105,7 +105,7 @@ def request(query, params):
# if our language is hosted on the main site, we need to add its name
# to the query in order to narrow the results to that language
if language in main_langs:
query += b' (' + main_langs[language] + b')'
query += ' (' + main_langs[language] + ')'
# prepare the request parameters
query = urlencode({'search': query})
......
......@@ -36,7 +36,7 @@ def request(query, params):
lang = match_language(params['language'], supported_languages, language_aliases)
query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')
query = 'language:{} {}'.format(lang.split('-')[0].upper(), query)
search_path = search_string.format(
query=urlencode({'q': query}),
......
......@@ -82,7 +82,7 @@ def request(query, params):
def response(resp):
results = []
rss = etree.fromstring(resp.content)
rss = etree.fromstring(resp.content.encode())
ns = rss.nsmap
......
......@@ -7,20 +7,18 @@ import unicodedata
from io import open
from datetime import datetime
if sys.version_info[0] == 3:
unicode = str
categories = []
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
weight = 100
parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
db = 1
def normalize_name(name):
name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
name = name.lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower()
......
......@@ -50,7 +50,7 @@ def response(resp):
if url.startswith('http://'):
url = 'https' + url[4:]
content = u'{} - {} - {}'.format(
content = '{} - {} - {}'.format(
result['artist']['name'],
result['album']['title'],
result['title'])
......
......@@ -15,10 +15,10 @@ from searx.utils import is_valid_lang
from searx.url_utils import urljoin
categories = ['general']
url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
results_xpath = './/table[@id="r"]/tr'
......@@ -37,7 +37,7 @@ def request(query, params):
params['url'] = url.format(from_lang=from_lang[2],
to_lang=to_lang[2],
query=query.decode('utf-8'))
query=query)
return params
......
......@@ -10,15 +10,11 @@
@parse url, title, content, magnetlink
"""
from sys import version_info
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
from searx.url_utils import urljoin
if version_info[0] == 3:
unicode = str
categories = ['videos', 'music', 'files']
paging = True
......
from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
from html.parser import HTMLParser
url = 'http://www.filecrop.com/'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
......
......@@ -90,7 +90,7 @@ def request(query, params):
# if our language is hosted on the main site, we need to add its name
# to the query in order to narrow the results to that language
if language in main_langs:
query += b' (' + (main_langs[language]).encode('utf-8') + b')'
query += ' (' + (main_langs[language]) + ')'
# prepare the request parameters
query = urlencode({'search': query})
......
......@@ -221,7 +221,7 @@ def response(resp):
instant_answer = dom.xpath('//div[@id="_vBb"]//text()')
if instant_answer:
results.append({'answer': u' '.join(instant_answer)})
results.append({'answer': ' '.join(instant_answer)})
try:
results_num = int(dom.xpath('//div[@id="resultStats"]//text()')[0]
.split()[1].replace(',', ''))
......
......@@ -70,7 +70,7 @@ def response(resp):
# parse results
for img in dom.xpath('//a'):
r = {
'title': u' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')),
'title': ' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')),
'content': '',
'template': 'images.html',
}
......
......@@ -16,11 +16,7 @@ from lxml import html
from dateutil import parser
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
from html.parser import HTMLParser
# engine dependent config
categories = ['videos']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment