diff --git a/Dockerfile b/Dockerfile index b1b9b18cdd8a626c02cc91fe96c78c44139ec731..6c0f11f862d1da487082e28fbae683e6a5062308 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM alpine:3.8 +FROM python:3.7-alpine LABEL maintainer="searx " LABEL description="A privacy-respecting, hackable metasearch engine." @@ -12,9 +12,6 @@ COPY requirements.txt ./requirements.txt RUN apk -U add \ build-base \ - python \ - python-dev \ - py-pip \ libxml2 \ libxml2-dev \ libxslt \ @@ -27,7 +24,6 @@ RUN apk -U add \ && pip install --no-cache -r requirements.txt \ && apk del \ build-base \ - python-dev \ libffi-dev \ openssl-dev \ libxslt-dev \ diff --git a/Dockerfile.env b/Dockerfile.env index 977e1d79e208cd745c53e9e1b2e10b29adcb3578..6d654297659fa8a7b7dc78d041cf7f84c235e9e3 100644 --- a/Dockerfile.env +++ b/Dockerfile.env @@ -5,4 +5,4 @@ RUN dnf install -y\ python2-pip\ npm\ && dnf groupinstall -y "Development Tools" \ -&& pip install pytest ipdb ipython \ +&& pip3 install pytest ipdb ipython diff --git a/manage.sh b/manage.sh index 3d60f13f367c0a5cc835f6c59b997724ff091e18..fbbcc5cb70cfe213a66b5ca82e093856dd9ba714 100755 --- a/manage.sh +++ b/manage.sh @@ -16,14 +16,14 @@ ACTION="$1" # update_packages() { - pip install --upgrade pip - pip install --upgrade setuptools - pip install -r "$BASE_DIR/requirements.txt" + pip3 install --upgrade pip + pip3 install --upgrade setuptools + pip3 install -r "$BASE_DIR/requirements.txt" } update_dev_packages() { update_packages - pip install -r "$BASE_DIR/requirements-dev.txt" + pip3 install -r "$BASE_DIR/requirements-dev.txt" } install_geckodriver() { @@ -36,7 +36,7 @@ install_geckodriver() { return fi GECKODRIVER_VERSION="v0.19.1" - PLATFORM="`python -c "import six; import platform; six.print_(platform.system().lower(), platform.architecture()[0])"`" + PLATFORM="`python3 -c "import platform; print(platform.system().lower(), platform.architecture()[0])"`" case "$PLATFORM" in "linux 32bit" | "linux2 32bit") ARCH="linux32";; "linux 64bit" | "linux2 64bit") ARCH="linux64";; @@ -80,19 +80,19 @@ pep8_check() { unit_tests() { echo '[!] Running unit tests' - python -m nose2 -s "$BASE_DIR/tests/unit" + python3 -m nose2 -s "$BASE_DIR/tests/unit" } py_test_coverage() { echo '[!] Running python test coverage' - PYTHONPATH="`pwd`" python -m nose2 -C --log-capture --with-coverage --coverage "$SEARX_DIR" -s "$BASE_DIR/tests/unit" \ + PYTHONPATH="`pwd`" python3 -m nose2 -C --log-capture --with-coverage --coverage "$SEARX_DIR" -s "$BASE_DIR/tests/unit" \ && coverage report \ && coverage html } robot_tests() { echo '[!] Running robot tests' - PYTHONPATH="`pwd`" python "$SEARX_DIR/testing.py" robot + PYTHONPATH="`pwd`" python3 "$SEARX_DIR/testing.py" robot } tests() { diff --git a/searx/answerers/__init__.py b/searx/answerers/__init__.py index 444316f11dda8473611099a739ff62d198b699f9..447383acc4612768805d21b705a9ab7271e39093 100644 --- a/searx/answerers/__init__.py +++ b/searx/answerers/__init__.py @@ -1,12 +1,8 @@ from os import listdir from os.path import realpath, dirname, join, isdir -from sys import version_info from searx.utils import load_module from collections import defaultdict -if version_info[0] == 3: - unicode = str - answerers_dir = dirname(realpath(__file__)) @@ -34,12 +30,12 @@ def get_answerers_by_keywords(answerers): def ask(query): results = [] - query_parts = list(filter(None, query.query.split())) + query_parts = list([_f for _f in query.query.split() if _f]) - if query_parts[0].decode('utf-8') not in answerers_by_keywords: + if query_parts[0] not in answerers_by_keywords: return results - for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]: + for answerer in answerers_by_keywords[query_parts[0]]: result = answerer(query) if result: results.append(result) diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py index b6e8422adb5eaf68522ab47bdc1d61ec45df76e6..7bfd5fa36df81dac3a2e2e95f2ba658f30a2728b 100644 --- a/searx/answerers/random/answerer.py +++ b/searx/answerers/random/answerer.py @@ -11,11 +11,7 @@ keywords = ('random',) random_int_max = 2**31 -if sys.version_info[0] == 2: - random_string_letters = string.lowercase + string.digits + string.uppercase -else: - unicode = str - random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase +random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase def random_characters(): @@ -24,32 +20,32 @@ def random_characters(): def random_string(): - return u''.join(random_characters()) + return ''.join(random_characters()) def random_float(): - return unicode(random.random()) + return str(random.random()) def random_int(): - return unicode(random.randint(-random_int_max, random_int_max)) + return str(random.randint(-random_int_max, random_int_max)) def random_sha256(): m = hashlib.sha256() - m.update(b''.join(random_characters())) - return unicode(m.hexdigest()) + m.update(''.join(random_characters()).encode()) + return m.hexdigest() def random_uuid(): - return unicode(uuid.uuid4()) + return str(uuid.uuid4()) -random_types = {b'string': random_string, - b'int': random_int, - b'float': random_float, - b'sha256': random_sha256, - b'uuid': random_uuid} +random_types = {'string': random_string, + 'int': random_int, + 'float': random_float, + 'sha256': random_sha256, + 'uuid': random_uuid} # required answerer function @@ -70,4 +66,4 @@ def answer(query): def self_info(): return {'name': gettext('Random value generator'), 'description': gettext('Generate different random values'), - 'examples': [u'random {}'.format(x) for x in random_types]} + 'examples': ['random {}'.format(x) for x in random_types]} diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py index 73dd25cfda36b150e8bb4abf94d8c1040fc35f59..d03a26af62d957f1a261bed64891148dbb8d3f47 100644 --- a/searx/answerers/statistics/answerer.py +++ b/searx/answerers/statistics/answerer.py @@ -1,12 +1,8 @@ -from sys import version_info from functools import reduce from operator import mul from flask_babel import gettext -if version_info[0] == 3: - unicode = str - keywords = ('min', 'max', 'avg', @@ -30,21 +26,21 @@ def answer(query): func = parts[0] answer = None - if func == b'min': + if func == 'min': answer = min(args) - elif func == b'max': + elif func == 'max': answer = max(args) - elif func == b'avg': + elif func == 'avg': answer = sum(args) / len(args) - elif func == b'sum': + elif func == 'sum': answer = sum(args) - elif func == b'prod': + elif func == 'prod': answer = reduce(mul, args, 1) if answer is None: return [] - return [{'answer': unicode(answer)}] + return [{'answer': answer}] # required answerer function diff --git a/searx/autocomplete.py b/searx/autocomplete.py index f8a45b3ecf86eda8166e4a1d81095af3454b322b..31cb209c2711ca70952f7dd0dd77795e52a584d8 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -81,22 +81,22 @@ def searx_bang(full_query): engine_query = full_query.getSearchQuery()[1:] for lc in language_codes: - lang_id, lang_name, country, english_name = map(unicode.lower, lc) + lang_id, lang_name, country, english_name = map(str.lower, lc) # check if query starts with language-id if lang_id.startswith(engine_query): if len(engine_query) <= 2: - results.append(u':{lang_id}'.format(lang_id=lang_id.split('-')[0])) + results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0])) else: - results.append(u':{lang_id}'.format(lang_id=lang_id)) + results.append(':{lang_id}'.format(lang_id=lang_id)) # check if query starts with language name if lang_name.startswith(engine_query) or english_name.startswith(engine_query): - results.append(u':{lang_name}'.format(lang_name=lang_name)) + results.append(':{lang_name}'.format(lang_name=lang_name)) # check if query starts with country if country.startswith(engine_query.replace('_', ' ')): - results.append(u':{country}'.format(country=country.replace(' ', '_'))) + results.append(':{country}'.format(country=country.replace(' ', '_'))) # remove duplicates result_set = set(results) diff --git a/searx/engines/acgsou.py b/searx/engines/acgsou.py index cca28f0db6a04b31180144f69a6e7967baf4c670..c82379c2735750f4654d785f9d942ec17c313f41 100644 --- a/searx/engines/acgsou.py +++ b/searx/engines/acgsou.py @@ -63,7 +63,7 @@ def response(resp): except: pass # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime - content = u'Category: "{category}".' + content = 'Category: "{category}".' content = content.format(category=category) results.append({'url': href, diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index fc08112af04276618990732a72ef1b30a5daa0f1..bb61fe25b9c3341523d2c1a6e5337ac82ed1512b 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -105,7 +105,7 @@ def request(query, params): # if our language is hosted on the main site, we need to add its name # to the query in order to narrow the results to that language if language in main_langs: - query += b' (' + main_langs[language] + b')' + query += ' (' + main_langs[language] + ')' # prepare the request parameters query = urlencode({'search': query}) diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 2da40619d3fd927b71987763bfe09cfc947eee14..23da029a6880c4d757e03baeb3616acb5881ccdb 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -36,7 +36,7 @@ def request(query, params): lang = match_language(params['language'], supported_languages, language_aliases) - query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8') + query = 'language:{} {}'.format(lang.split('-')[0].upper(), query) search_path = search_string.format( query=urlencode({'q': query}), diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 39048a1fcfb16885430b3311ac70cd7e3249b8a0..1a71aed6d800975bcbfa445714f33044072cd5a1 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -82,7 +82,7 @@ def request(query, params): def response(resp): results = [] - rss = etree.fromstring(resp.content) + rss = etree.fromstring(resp.content.encode()) ns = rss.nsmap diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 8eab8f6736227879f0650fb406911f51a99252e6..9424d7d5ece8ad60b6f262baf6b0485ce521a00a 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -7,20 +7,18 @@ import unicodedata from io import open from datetime import datetime -if sys.version_info[0] == 3: - unicode = str categories = [] url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}' weight = 100 -parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) +parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) db = 1 def normalize_name(name): - name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s') + name = name.lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index af63478fb74b2a46fd7d33e253c5537d198e8476..d176684c66dfdd0f71a3b5952453a751a4d6447e 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -50,7 +50,7 @@ def response(resp): if url.startswith('http://'): url = 'https' + url[4:] - content = u'{} - {} - {}'.format( + content = '{} - {} - {}'.format( result['artist']['name'], result['album']['title'], result['title']) diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 7cc44df73d8c754cc8c593fbc0078fbc6885f74c..deba3b6e514956f3e9508050c2f7cfca50b7ff7e 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -15,10 +15,10 @@ from searx.utils import is_valid_lang from searx.url_utils import urljoin categories = ['general'] -url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' +url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' weight = 100 -parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) +parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) results_xpath = './/table[@id="r"]/tr' @@ -37,7 +37,7 @@ def request(query, params): params['url'] = url.format(from_lang=from_lang[2], to_lang=to_lang[2], - query=query.decode('utf-8')) + query=query) return params diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index ff2f9459306ff0dcba3c3e1ea4e1631dc6e77890..3bbf902c28dd6fb2e61c9ab6f67496292674bdaf 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -10,15 +10,11 @@ @parse url, title, content, magnetlink """ -from sys import version_info from lxml import html from searx.engines.xpath import extract_text from searx.utils import get_torrent_size from searx.url_utils import urljoin -if version_info[0] == 3: - unicode = str - categories = ['videos', 'music', 'files'] paging = True diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py index ed57a6bf3722d5390806ea77f0da0babf5299401..48c34fa897f8c27545539af30d3fdf03a4bbed25 100644 --- a/searx/engines/filecrop.py +++ b/searx/engines/filecrop.py @@ -1,9 +1,5 @@ from searx.url_utils import urlencode - -try: - from HTMLParser import HTMLParser -except: - from html.parser import HTMLParser +from html.parser import HTMLParser url = 'http://www.filecrop.com/' search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py index a7a966cc921b878fb4b938cffe92c705a3d8fee3..8075d580ad8404f1aeba70db1bad76788e47cabf 100644 --- a/searx/engines/gentoo.py +++ b/searx/engines/gentoo.py @@ -90,7 +90,7 @@ def request(query, params): # if our language is hosted on the main site, we need to add its name # to the query in order to narrow the results to that language if language in main_langs: - query += b' (' + (main_langs[language]).encode('utf-8') + b')' + query += ' (' + (main_langs[language]) + ')' # prepare the request parameters query = urlencode({'search': query}) diff --git a/searx/engines/google.py b/searx/engines/google.py index 62e7d1170f0d0e9b174040aecf1e0baa6c598d2b..180e8fc09df6c50379e00aed2bb3296ad8d1b867 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -221,7 +221,7 @@ def response(resp): instant_answer = dom.xpath('//div[@id="_vBb"]//text()') if instant_answer: - results.append({'answer': u' '.join(instant_answer)}) + results.append({'answer': ' '.join(instant_answer)}) try: results_num = int(dom.xpath('//div[@id="resultStats"]//text()')[0] .split()[1].replace(',', '')) diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 504831a1045e6ed1fb3b5f5356310a8a5530da1e..6a32500fb228d03ecadba8cfcb70eae0d0657f4b 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -70,7 +70,7 @@ def response(resp): # parse results for img in dom.xpath('//a'): r = { - 'title': u' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')), + 'title': ' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')), 'content': '', 'template': 'images.html', } diff --git a/searx/engines/ina.py b/searx/engines/ina.py index 37a05f099ff38cc83af0745db02aa5355989afcd..5d59c9f1daee558d5a8e008e62cdef4a4e9de380 100644 --- a/searx/engines/ina.py +++ b/searx/engines/ina.py @@ -16,11 +16,7 @@ from lxml import html from dateutil import parser from searx.engines.xpath import extract_text from searx.url_utils import urlencode - -try: - from HTMLParser import HTMLParser -except: - from html.parser import HTMLParser +from html.parser import HTMLParser # engine dependent config categories = ['videos'] diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 785b0c49026272e86d45ad297f32b139e88d4124..550947ab6818587e15b77259f06c348c3f2baeca 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -1,11 +1,8 @@ from collections import Iterable from json import loads -from sys import version_info from searx.url_utils import urlencode from searx.utils import to_string -if version_info[0] == 3: - unicode = str search_url = None url_query = None @@ -37,7 +34,7 @@ def iterate(iterable): def is_iterable(obj): if type(obj) == str: return False - if type(obj) == unicode: + if type(obj) == str: return False return isinstance(obj, Iterable) diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index c7b05ffcdf1e7e6c407776988dca06881e94f99c..e40e34d5f89fd2238f4dec39e00e30d4b776364a 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -76,7 +76,7 @@ def response(resp): if result.get('snippet', '').startswith('#REDIRECT'): continue url = base_url.format(language=resp.search_params['language']) +\ - 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')) + 'wiki/' + quote(result['title'].replace(' ', '_')) # append result results.append({'url': url, diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 733ba62034c15e102e46e40d684119e3a881fa22..4a13a25dd5af3a611b7bf553092632a2ecdc15c5 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -39,7 +39,7 @@ def response(resp): if 'display_name' not in r: continue - title = r['display_name'] or u'' + title = r['display_name'] or '' osm_type = r.get('osm_type', r.get('type')) url = result_base_url.format(osm_type=osm_type, osm_id=r['osm_id']) @@ -51,7 +51,7 @@ def response(resp): # if no geojson is found and osm_type is a node, add geojson Point if not geojson and osm_type == 'node': - geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]} + geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]} address_raw = r.get('address') address = {} diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index d59755e04c5d7830f715ce87d2d1fead74197eae..296a94d07722c93a1ff12dd1799a15982eb90569 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -17,11 +17,7 @@ from dateutil import parser from searx import logger from searx.poolrequests import get as http_get from searx.url_utils import quote_plus, urlencode - -try: - from cStringIO import StringIO -except: - from io import StringIO +from io import StringIO # engine dependent config categories = ['music'] diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index aed756be3a0e280d1e8e814f59be435eb4006b91..ff536e30f5a7bf51be06215f5578976ce6641a0c 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -45,7 +45,7 @@ def response(resp): if result['type'] == 'track': title = result['name'] url = result['external_urls']['spotify'] - content = u'{} - {} - {}'.format( + content = '{} - {} - {}'.format( result['artists'][0]['name'], result['album']['name'], result['name']) diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py index ff4df24b724beb6d77474096bbfabe0e13ff5c4c..f2fafec3a62a82d2e5ba4a236b29700a3a8bd98a 100644 --- a/searx/engines/swisscows.py +++ b/searx/engines/swisscows.py @@ -28,10 +28,10 @@ search_string = '?{query}&page={page}' supported_languages_url = base_url # regex -regex_json = re.compile(b'initialData: {"Request":(.|\n)*},\s*environment') -regex_json_remove_start = re.compile(b'^initialData:\s*') -regex_json_remove_end = re.compile(b',\s*environment$') -regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=') +regex_json = re.compile('initialData: {"Request":(.|\n)*},\s*environment') +regex_json_remove_start = re.compile('^initialData:\s*') +regex_json_remove_end = re.compile(',\s*environment$') +regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=') # do search-request @@ -63,16 +63,16 @@ def response(resp): if not json_regex: return [] - json_raw = regex_json_remove_end.sub(b'', regex_json_remove_start.sub(b'', json_regex.group())) - json = loads(json_raw.decode('utf-8')) + json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group())) + json = loads(json_raw) # parse results for result in json['Results'].get('items', []): - result_title = result['Title'].replace(u'\uE000', '').replace(u'\uE001', '') + result_title = result['Title'].replace('\\uE000', '').replace('\\uE001', '') # parse image results if result.get('ContentType', '').startswith('image'): - img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8')) + img_url = unquote(regex_img_url_remove_start.sub('', result['Url'])) # append result results.append({'url': result['SourceUrl'], @@ -83,8 +83,8 @@ def response(resp): # parse general results else: - result_url = result['Url'].replace(u'\uE000', '').replace(u'\uE001', '') - result_content = result['Description'].replace(u'\uE000', '').replace(u'\uE001', '') + result_url = result['Url'].replace('\\uE000', '').replace('\\uE001', '') + result_content = result['Description'].replace('\\uE000', '').replace('\\uE001', '') # append result results.append({'url': result_url, @@ -94,7 +94,7 @@ def response(resp): # parse images for result in json.get('Images', []): # decode image url - img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8')) + img_url = unquote(regex_img_url_remove_start.sub('', result['Url'])) # append result results.append({'url': result['SourceUrl'], diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 5c7b170332c963d2c748af8230525d7348d1ce37..295089cb309d771c36c6d200eb5db0446f8f69f4 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -9,23 +9,20 @@ @parse url, title, content """ import re -from sys import version_info from searx.utils import is_valid_lang -if version_info[0] == 3: - unicode = str categories = ['general'] -url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' -web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' +url = 'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' +web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' weight = 100 -parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) +parser_re = re.compile('.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) api_key = '' def request(query, params): - m = parser_re.match(unicode(query, 'utf8')) + m = parser_re.match(query) if not m: return params diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index c315b30da575c4f88a106e7910c69f4231eb2ab8..96d46f182dd3aa037afc80da7b2c29ebbb7984a3 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -434,16 +434,16 @@ def get_geolink(result): latitude, longitude = coordinates.split(',') # convert to decimal - lat = int(latitude[:latitude.find(u'°')]) + lat = int(latitude[:latitude.find('°')]) if latitude.find('\'') >= 0: - lat += int(latitude[latitude.find(u'°') + 1:latitude.find('\'')] or 0) / 60.0 + lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0 if latitude.find('"') >= 0: lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0 if latitude.find('S') >= 0: lat *= -1 - lon = int(longitude[:longitude.find(u'°')]) + lon = int(longitude[:longitude.find('°')]) if longitude.find('\'') >= 0: - lon += int(longitude[longitude.find(u'°') + 1:longitude.find('\'')] or 0) / 60.0 + lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0 if longitude.find('"') >= 0: lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0 if longitude.find('W') >= 0: diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 6cd17e378909d08ecb8ec2060d4d7634fbf1eea7..987b8f9b118c45a2e857f4aabf38e7f834b246b1 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -16,8 +16,8 @@ from searx.url_utils import quote, urlencode from searx.utils import match_language # search-url -base_url = u'https://{language}.wikipedia.org/' -search_url = base_url + u'w/api.php?'\ +base_url = 'https://{language}.wikipedia.org/' +search_url = base_url + 'w/api.php?'\ 'action=query'\ '&format=json'\ '&{query}'\ @@ -37,7 +37,7 @@ def url_lang(lang): # do search-request def request(query, params): if query.islower(): - query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8') + query = '{0}|{1}'.format(query, query.title()) params['url'] = search_url.format(query=urlencode({'titles': query}), language=url_lang(params['language'])) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 595c6b7de3245b0236a6decddb81e8454c6c77d6..383d8c3b36902796b3e28894b580da3d915bbf99 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -45,15 +45,15 @@ def request(query, params): # replace private user area characters to make text legible def replace_pua_chars(text): - pua_chars = {u'\uf522': u'\u2192', # rigth arrow - u'\uf7b1': u'\u2115', # set of natural numbers - u'\uf7b4': u'\u211a', # set of rational numbers - u'\uf7b5': u'\u211d', # set of real numbers - u'\uf7bd': u'\u2124', # set of integer numbers - u'\uf74c': 'd', # differential - u'\uf74d': u'\u212f', # euler's number - u'\uf74e': 'i', # imaginary number - u'\uf7d9': '='} # equals sign + pua_chars = {' ': '→', # rigth arrow + '': 'ℕ', # set of natural numbers + ' ': 'ℚ', # set of rational numbers + '': 'ℝ', # set of real numbers + ' ': ' ℤ', # set of integer numbers + '': 'd', # differential + '': ' ℯ', # euler's number + '': 'i', # imaginary number + '': '='} # equals sign for k, v in pua_chars.items(): text = text.replace(k, v) @@ -65,7 +65,7 @@ def replace_pua_chars(text): def response(resp): results = [] - search_results = etree.XML(resp.text) + search_results = etree.XML(resp.text.encode()) # return empty array if there are no results if search_results.xpath(failure_xpath): diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 50f98d935d7460727e4987d13967d28fea7d2c60..8da0547c6cd769e7e44c1dd407ee75e0c5cac177 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -53,7 +53,7 @@ def extract_url(xpath_results, search_url): if url.startswith('//'): # add http or https to this kind of url //example.com/ parsed_search_url = urlparse(search_url) - url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url) + url = '{0}:{1}'.format(parsed_search_url.scheme or 'http', url) elif url.startswith('/'): # fix relative url to the search engine url = urljoin(search_url, url) @@ -79,7 +79,7 @@ def normalize_url(url): p = parsed_url.path mark = p.find('/**') if mark != -1: - return unquote(p[mark + 3:]).decode('utf-8') + return unquote(p[mark + 3:]) return url diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index b61384d06bf040a66a14b2cfeed5bef7f87f1b53..a7789fcaeb79f256ddc3c80fd62013c602db377a 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -55,7 +55,7 @@ def request(query, params): def sanitize_url(url): if ".yahoo.com/" in url: - return re.sub(u"\\;\\_ylt\\=.+$", "", url) + return re.sub("\\;\\_ylt\\=.+$", "", url) else: return url diff --git a/searx/languages.py b/searx/languages.py index cab1245412f6823e7962edb41ea873c428dc316a..d778c29dc86d570308073c326f4aace2ef3f659e 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -3,65 +3,65 @@ # this file is generated automatically by utils/update_search_languages.py language_codes = ( - (u"ar-SA", u"العربية", u"", u"Arabic"), - (u"bg-BG", u"Български", u"", u"Bulgarian"), - (u"ca-ES", u"Català", u"", u"Catalan"), - (u"cs-CZ", u"Čeština", u"", u"Czech"), - (u"da-DK", u"Dansk", u"", u"Danish"), - (u"de", u"Deutsch", u"", u"German"), - (u"de-AT", u"Deutsch", u"Österreich", u"German"), - (u"de-CH", u"Deutsch", u"Schweiz", u"German"), - (u"de-DE", u"Deutsch", u"Deutschland", u"German"), - (u"el-GR", u"Ελληνικά", u"", u"Greek"), - (u"en", u"English", u"", u"English"), - (u"en-AU", u"English", u"Australia", u"English"), - (u"en-CA", u"English", u"Canada", u"English"), - (u"en-GB", u"English", u"United Kingdom", u"English"), - (u"en-IN", u"English", u"India", u"English"), - (u"en-MY", u"English", u"Malaysia", u"English"), - (u"en-US", u"English", u"United States", u"English"), - (u"es", u"Español", u"", u"Spanish"), - (u"es-AR", u"Español", u"Argentina", u"Spanish"), - (u"es-ES", u"Español", u"España", u"Spanish"), - (u"es-MX", u"Español", u"México", u"Spanish"), - (u"et-EE", u"Eesti", u"", u"Estonian"), - (u"fa-IR", u"فارسی", u"", u"Persian"), - (u"fi-FI", u"Suomi", u"", u"Finnish"), - (u"fr", u"Français", u"", u"French"), - (u"fr-BE", u"Français", u"Belgique", u"French"), - (u"fr-CA", u"Français", u"Canada", u"French"), - (u"fr-CH", u"Français", u"Suisse", u"French"), - (u"fr-FR", u"Français", u"France", u"French"), - (u"he-IL", u"עברית", u"", u"Hebrew"), - (u"hr-HR", u"Hrvatski", u"", u"Croatian"), - (u"hu-HU", u"Magyar", u"", u"Hungarian"), - (u"id-ID", u"Indonesia", u"", u"Indonesian"), - (u"is-IS", u"Íslenska", u"", u"Icelandic"), - (u"it-IT", u"Italiano", u"", u"Italian"), - (u"ja-JP", u"日本語", u"", u"Japanese"), - (u"ko-KR", u"한국어", u"", u"Korean"), - (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"), - (u"lv-LV", u"Latviešu", u"", u"Latvian"), - (u"ms-MY", u"Bahasa Melayu", u"", u"Malay"), - (u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"), - (u"nl", u"Nederlands", u"", u"Dutch"), - (u"nl-BE", u"Nederlands", u"België", u"Dutch"), - (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), - (u"pl-PL", u"Polski", u"", u"Polish"), - (u"pt", u"Português", u"", u"Portuguese"), - (u"pt-BR", u"Português", u"Brasil", u"Portuguese"), - (u"pt-PT", u"Português", u"Portugal", u"Portuguese"), - (u"ro-RO", u"Română", u"", u"Romanian"), - (u"ru-RU", u"Русский", u"", u"Russian"), - (u"sk-SK", u"Slovenčina", u"", u"Slovak"), - (u"sl-SI", u"Slovenščina", u"", u"Slovenian"), - (u"sr-RS", u"Српски", u"", u"Serbian"), - (u"sv-SE", u"Svenska", u"", u"Swedish"), - (u"th-TH", u"ไทย", u"", u"Thai"), - (u"tr-TR", u"Türkçe", u"", u"Turkish"), - (u"uk-UA", u"Українська", u"", u"Ukrainian"), - (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"), - (u"zh", u"中文", u"", u"Chinese"), - (u"zh-CN", u"中文", u"中国", u"Chinese"), - (u"zh-TW", u"中文", u"台灣", u"Chinese") + ("ar-SA", "العربية", "", "Arabic"), + ("bg-BG", "Български", "", "Bulgarian"), + ("ca-ES", "Català", "", "Catalan"), + ("cs-CZ", "Čeština", "", "Czech"), + ("da-DK", "Dansk", "", "Danish"), + ("de", "Deutsch", "", "German"), + ("de-AT", "Deutsch", "Österreich", "German"), + ("de-CH", "Deutsch", "Schweiz", "German"), + ("de-DE", "Deutsch", "Deutschland", "German"), + ("el-GR", "Ελληνικά", "", "Greek"), + ("en", "English", "", "English"), + ("en-AU", "English", "Australia", "English"), + ("en-CA", "English", "Canada", "English"), + ("en-GB", "English", "United Kingdom", "English"), + ("en-IN", "English", "India", "English"), + ("en-MY", "English", "Malaysia", "English"), + ("en-US", "English", "United States", "English"), + ("es", "Español", "", "Spanish"), + ("es-AR", "Español", "Argentina", "Spanish"), + ("es-ES", "Español", "España", "Spanish"), + ("es-MX", "Español", "México", "Spanish"), + ("et-EE", "Eesti", "", "Estonian"), + ("fa-IR", "فارسی", "", "Persian"), + ("fi-FI", "Suomi", "", "Finnish"), + ("fr", "Français", "", "French"), + ("fr-BE", "Français", "Belgique", "French"), + ("fr-CA", "Français", "Canada", "French"), + ("fr-CH", "Français", "Suisse", "French"), + ("fr-FR", "Français", "France", "French"), + ("he-IL", "עברית", "", "Hebrew"), + ("hr-HR", "Hrvatski", "", "Croatian"), + ("hu-HU", "Magyar", "", "Hungarian"), + ("id-ID", "Indonesia", "", "Indonesian"), + ("is-IS", "Íslenska", "", "Icelandic"), + ("it-IT", "Italiano", "", "Italian"), + ("ja-JP", "日本語", "", "Japanese"), + ("ko-KR", "한국어", "", "Korean"), + ("lt-LT", "Lietuvių", "", "Lithuanian"), + ("lv-LV", "Latviešu", "", "Latvian"), + ("ms-MY", "Bahasa Melayu", "", "Malay"), + ("nb-NO", "Norsk Bokmål", "", "Norwegian Bokmål"), + ("nl", "Nederlands", "", "Dutch"), + ("nl-BE", "Nederlands", "België", "Dutch"), + ("nl-NL", "Nederlands", "Nederland", "Dutch"), + ("pl-PL", "Polski", "", "Polish"), + ("pt", "Português", "", "Portuguese"), + ("pt-BR", "Português", "Brasil", "Portuguese"), + ("pt-PT", "Português", "Portugal", "Portuguese"), + ("ro-RO", "Română", "", "Romanian"), + ("ru-RU", "Русский", "", "Russian"), + ("sk-SK", "Slovenčina", "", "Slovak"), + ("sl-SI", "Slovenščina", "", "Slovenian"), + ("sr-RS", "Српски", "", "Serbian"), + ("sv-SE", "Svenska", "", "Swedish"), + ("th-TH", "ไทย", "", "Thai"), + ("tr-TR", "Türkçe", "", "Turkish"), + ("uk-UA", "Українська", "", "Ukrainian"), + ("vi-VN", "Tiếng Việt", "", "Vietnamese"), + ("zh", "中文", "", "Chinese"), + ("zh-CN", "中文", "中国", "Chinese"), + ("zh-TW", "中文", "台灣", "Chinese") ) diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 4dbcbbd28c5fd4a30b4a4a031ddf72e25a2ace4d..408d1b681f89805349c26f271982ded535c142e3 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -14,11 +14,9 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, ''' -from sys import exit, version_info +from sys import exit from searx import logger -if version_info[0] == 3: - unicode = str logger = logger.getChild('plugins') @@ -31,8 +29,8 @@ from searx.plugins import (oa_doi_rewrite, tracker_url_remover, vim_hotkeys) -required_attrs = (('name', (str, unicode)), - ('description', (str, unicode)), +required_attrs = (('name', str), + ('description', str), ('default_on', bool)) optional_attrs = (('js_dependencies', tuple), diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py index 8236526871524a2ed2ef6111c9067a73418c94b1..343f417f6caa790ea021ff4c3d71b038df550a2f 100644 --- a/searx/plugins/https_rewrite.py +++ b/searx/plugins/https_rewrite.py @@ -25,8 +25,6 @@ from flask_babel import gettext from searx import searx_dir from searx.url_utils import urlparse -if sys.version_info[0] == 3: - unicode = str name = "HTTPS rewrite" description = gettext('Rewrite HTTP links to HTTPS if possible') diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py index 51fa4a17592891ba73de72e0b3d244d8dc3d590a..ad15efeedd749ef705ab5746cd5cc7ad08933272 100644 --- a/searx/plugins/self_info.py +++ b/searx/plugins/self_info.py @@ -22,7 +22,7 @@ default_on = True # Self User Agent regex -p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE) +p = re.compile('.*user[ -]agent.*', re.IGNORECASE) # attach callback to the post search hook @@ -31,7 +31,7 @@ p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE) def post_search(request, searchData): if searchData.pageno > 1: return True - if searchData.query == b'ip': + if searchData.query == 'ip': x_forwarded_for = request.headers.getlist("X-Forwarded-For") if x_forwarded_for: ip = x_forwarded_for[0] diff --git a/searx/preferences.py b/searx/preferences.py index ed2cc402abb2a212375bb09fb6c571a9c00f24f8..9c10bd11892a4c42a602192fa76a6468dd64ea9d 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -1,14 +1,10 @@ from base64 import urlsafe_b64encode, urlsafe_b64decode from zlib import compress, decompress -from sys import version from searx import settings, autocomplete from searx.languages import language_codes as languages from searx.url_utils import parse_qs, urlencode -if version[0] == '3': - unicode = str - COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years LANGUAGE_CODES = [l[0] for l in languages] @@ -287,11 +283,11 @@ class Preferences(object): settings_kv['disabled_plugins'] = ','.join(self.plugins.disabled) settings_kv['enabled_plugins'] = ','.join(self.plugins.enabled) - return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8') + return urlsafe_b64encode(compress(urlencode(settings_kv).encode())) def parse_encoded_data(self, input_data): - decoded_data = decompress(urlsafe_b64decode(input_data.encode('utf-8'))) - self.parse_dict({x: y[0] for x, y in parse_qs(unicode(decoded_data)).items()}) + decoded_data = decompress(urlsafe_b64decode(input_data.encode())) + self.parse_dict({x: y[0] for x, y in parse_qs(str(decoded_data)).items()}) def parse_dict(self, input_data): for user_setting_name, user_setting in input_data.items(): diff --git a/searx/query.py b/searx/query.py index 5846478988780f3eab1e311b6f2f98438a4e26fc..fe44496c65427c84da014ba209a555ecf0f83023 100644 --- a/searx/query.py +++ b/searx/query.py @@ -24,8 +24,6 @@ from searx.engines import ( import re import sys -if sys.version_info[0] == 3: - unicode = str VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') @@ -76,7 +74,7 @@ class RawTextQuery(object): # check if any language-code is equal with # declared language-codes for lc in language_codes: - lang_id, lang_name, country, english_name = map(unicode.lower, lc) + lang_id, lang_name, country, english_name = map(str.lower, lc) # if correct language-code is found # set it as new search-language @@ -157,7 +155,7 @@ class RawTextQuery(object): def getFullQuery(self): # get full querry including whitespaces - return u''.join(self.query_parts) + return ''.join(self.query_parts) class SearchQuery(object): diff --git a/searx/results.py b/searx/results.py index ab1c83e198ea89eb278ee236553cb4ff5b8e47c9..cde7be1212dc460f52190f85db54da7a9f34d4e7 100644 --- a/searx/results.py +++ b/searx/results.py @@ -1,13 +1,10 @@ import re -import sys from collections import defaultdict from operator import itemgetter from threading import RLock from searx.engines import engines from searx.url_utils import urlparse, unquote -if sys.version_info[0] == 3: - basestring = str CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) @@ -15,7 +12,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) # return the meaningful length of the content for a result def result_content_len(content): - if isinstance(content, basestring): + if isinstance(content, str): return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content)) else: return 0 @@ -170,15 +167,15 @@ class ResultContainer(object): self.paging = True for i, result in enumerate(results): - if 'url' in result and not isinstance(result['url'], basestring): + if 'url' in result and not isinstance(result['url'], str): continue try: - result['url'] = result['url'].decode('utf-8') + result['url'] = result['url'] except: pass - if 'title' in result and not isinstance(result['title'], basestring): + if 'title' in result and not isinstance(result['title'], str): continue - if 'content' in result and not isinstance(result['content'], basestring): + if 'content' in result and not isinstance(result['content'], str): continue position = i + 1 self._merge_result(result, position) @@ -316,7 +313,7 @@ class ResultContainer(object): resultnum_sum = sum(self._number_of_results) if not resultnum_sum or not self._number_of_results: return 0 - return resultnum_sum / len(self._number_of_results) + return resultnum_sum // len(self._number_of_results) def add_unresponsive_engine(self, engine_error): self.unresponsive_engines.add(engine_error) diff --git a/searx/search.py b/searx/search.py index 3975487715d4f47fb7ab2282d8cb944e422effd6..d6888daac9bf8699aa22ab74e8766b43f330e9a3 100644 --- a/searx/search.py +++ b/searx/search.py @@ -16,7 +16,6 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' import gc -import sys import threading from time import time from uuid import uuid4 @@ -36,13 +35,7 @@ from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE from searx.results import ResultContainer from searx.utils import gen_useragent -try: - from thread import start_new_thread -except: - from _thread import start_new_thread - -if sys.version_info[0] == 3: - unicode = str +from _thread import start_new_thread logger = logger.getChild('search') @@ -139,7 +132,7 @@ def search_one_request_safe(engine_name, query, request_params, result_container else: result_container.add_unresponsive_engine(( engine_name, - u'{0}: {1}'.format(gettext('unexpected crash'), e), + '{0}: {1}'.format(gettext('unexpected crash'), e), )) # others errors logger.exception('engine {0} : exception : {1}'.format(engine_name, e)) @@ -205,7 +198,7 @@ def get_search_query_from_webapp(preferences, form): raw_text_query.parse_query() # set query - query = raw_text_query.getSearchQuery().encode('utf-8') + query = raw_text_query.getSearchQuery() # get and check page number pageno_param = form.get('pageno', '1') diff --git a/searx/search_database.py b/searx/search_database.py index 5c106e42fa5d77990d9dc11fed52a934f879d5f3..c9f74e6bfea15850106f1b0cc465808cd5be163e 100644 --- a/searx/search_database.py +++ b/searx/search_database.py @@ -1,6 +1,6 @@ import json import threading -import urllib +import urllib.parse import redis @@ -90,11 +90,11 @@ def get_twenty_queries(x, host): def e(obj): - return urllib.quote_plus(obj) + return urllib.parse.quote_plus(obj) def d(coded): - return urllib.unquote_plus(coded) + return urllib.parse.unquote_plus(coded) def je(obj): diff --git a/searx/testing.py b/searx/testing.py index 0d17b2a08645f8a15e6a58f069c07f7c4532d119..279ae0b7a41bbd2b703d4a1715e3ea628a6e0ea6 100644 --- a/searx/testing.py +++ b/searx/testing.py @@ -16,7 +16,7 @@ from unittest2 import TestCase class SearxTestLayer: """Base layer for non-robot tests.""" - __name__ = u'SearxTestLayer' + __name__ = 'SearxTestLayer' def setUp(cls): pass diff --git a/searx/url_utils.py b/searx/url_utils.py index dcafc3ba817455756a9b9f49a9c65f19c159539a..f349a98d85a56cd35cd3c45425e41d5c6819a268 100644 --- a/searx/url_utils.py +++ b/searx/url_utils.py @@ -1,21 +1,15 @@ -from sys import version_info - -if version_info[0] == 2: - from urllib import quote, quote_plus, unquote, urlencode - from urlparse import parse_qs, parse_qsl, urljoin, urlparse, urlunparse, ParseResult -else: - from urllib.parse import ( - parse_qs, - parse_qsl, - quote, - quote_plus, - unquote, - urlencode, - urljoin, - urlparse, - urlunparse, - ParseResult - ) +from urllib.parse import ( + parse_qs, + parse_qsl, + quote, + quote_plus, + unquote, + urlencode, + urljoin, + urlparse, + urlunparse, + ParseResult +) __export__ = (parse_qs, diff --git a/searx/utils.py b/searx/utils.py index dfa22c5fc0c1a5a54d5acc9a2b2c7215476d2909..c59ceca9e75915c2d86b0d8c16a0ea1b1e87167b 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -21,23 +21,9 @@ from searx.languages import language_codes from searx import settings from searx import logger -try: - from cStringIO import StringIO -except: - from io import StringIO - -try: - from HTMLParser import HTMLParser -except: - from html.parser import HTMLParser - -if sys.version_info[0] == 3: - unichr = chr - unicode = str - IS_PY2 = False - basestring = str -else: - IS_PY2 = True +from io import StringIO +from html.parser import HTMLParser + logger = logger.getChild('utils') @@ -67,19 +53,18 @@ def highlight_content(content, query): if content.find('<') != -1: return content - query = query.decode('utf-8') if content.lower().find(query.lower()) > -1: - query_regex = u'({0})'.format(re.escape(query)) + query_regex = '({0})'.format(re.escape(query)) content = re.sub(query_regex, '\\1', content, flags=re.I | re.U) else: regex_parts = [] for chunk in query.split(): if len(chunk) == 1: - regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk))) + regex_parts.append('\\W+{0}\\W+'.format(re.escape(chunk))) else: - regex_parts.append(u'{0}'.format(re.escape(chunk))) - query_regex = u'({0})'.format('|'.join(regex_parts)) + regex_parts.append('{0}'.format(re.escape(chunk))) + query_regex = '({0})'.format('|'.join(regex_parts)) content = re.sub(query_regex, '\\1', content, flags=re.I | re.U) @@ -116,11 +101,11 @@ class HTMLTextExtractor(HTMLParser): def handle_charref(self, number): if not self.is_valid_tag(): return - if number[0] in (u'x', u'X'): + if number[0] in ('x', 'X'): codepoint = int(number[1:], 16) else: codepoint = int(number) - self.result.append(unichr(codepoint)) + self.result.append(chr(codepoint)) def handle_entityref(self, name): if not self.is_valid_tag(): @@ -130,7 +115,7 @@ class HTMLTextExtractor(HTMLParser): self.result.append(name) def get_text(self): - return u''.join(self.result).strip() + return ''.join(self.result).strip() def html_to_text(html): @@ -155,22 +140,14 @@ class UnicodeWriter: self.encoder = getincrementalencoder(encoding)() def writerow(self, row): - if IS_PY2: - row = [s.encode("utf-8") if hasattr(s, 'encode') else s for s in row] self.writer.writerow(row) # Fetch UTF-8 output from the queue ... data = self.queue.getvalue() - if IS_PY2: - data = data.decode("utf-8") - else: - data = data.strip('\x00') + data = data.strip('\x00') # ... and reencode it into the target encoding data = self.encoder.encode(data) # write to the target stream - if IS_PY2: - self.stream.write(data) - else: - self.stream.write(data.decode("utf-8")) + self.stream.write(data) # empty queue self.queue.truncate(0) @@ -245,7 +222,7 @@ def dict_subset(d, properties): def prettify_url(url, max_length=74): if len(url) > max_length: chunk_len = int(max_length / 2 + 1) - return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:]) + return '{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:]) else: return url @@ -384,17 +361,14 @@ def load_module(filename, module_dir): def new_hmac(secret_key, url): - if sys.version_info[0] == 2: - return hmac.new(bytes(secret_key), url, hashlib.sha256).hexdigest() - else: - return hmac.new(bytes(secret_key, 'utf-8'), url, hashlib.sha256).hexdigest() + return hmac.new(bytes(secret_key, 'utf-8'), url, hashlib.sha256).hexdigest() def to_string(obj): - if isinstance(obj, basestring): + if isinstance(obj, str): return obj if isinstance(obj, Number): - return unicode(obj) + return str(obj) if hasattr(obj, '__str__'): return obj.__str__() if hasattr(obj, '__repr__'): diff --git a/searx/webapp.py b/searx/webapp.py index a7bee4d729ca760b24499639edb910ef9d502517..6836c8e382d32957fc4498f244af658e887cc977 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -46,7 +46,7 @@ except: from sys import exit exit(1) -from cgi import escape +from html import escape from datetime import datetime, timedelta from werkzeug.contrib.fixers import ProxyFix from flask import ( @@ -87,16 +87,7 @@ except ImportError: logger.critical("The pyopenssl package has to be installed.\n" "Some HTTPS connections will fail") -try: - from cStringIO import StringIO -except: - from io import StringIO - -if sys.version_info[0] == 3: - unicode = str - PY3 = True -else: - PY3 = False +from io import StringIO # serve pages with HTTP/1.1 from werkzeug.serving import WSGIRequestHandler @@ -282,11 +273,11 @@ def proxify(url): if not settings.get('result_proxy'): return url - url_params = dict(mortyurl=url.encode('utf-8')) + url_params = dict(mortyurl=url) if settings['result_proxy'].get('key'): url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'], - url.encode('utf-8'), + url, hashlib.sha256).hexdigest() return '{0}?{1}'.format(settings['result_proxy']['url'], @@ -303,10 +294,10 @@ def image_proxify(url): if settings.get('result_proxy'): return proxify(url) - h = new_hmac(settings['server']['secret_key'], url.encode('utf-8')) + h = new_hmac(settings['server']['secret_key'], url) return '{0}?{1}'.format(url_for('image_proxy'), - urlencode(dict(url=url.encode('utf-8'), h=h))) + urlencode(dict(url=url, h=h))) def render(template_name, override_theme=None, **kwargs): @@ -384,7 +375,7 @@ def render(template_name, override_theme=None, **kwargs): kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') - kwargs['unicode'] = unicode + kwargs['unicode'] = str kwargs['preferences'] = request.preferences @@ -443,7 +434,7 @@ def config_results(results, query): for result in results: if 'content' in result and result['content']: result['content'] = highlight_content(escape(result['content'][:1024]), query) - result['title'] = highlight_content(escape(result['title'] or u''), query) + result['title'] = highlight_content(escape(result['title'] or ''), query) result['pretty_url'] = prettify_url(result['url']) if 'pubdate' in result: @@ -453,9 +444,9 @@ def config_results(results, query): minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: - result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) + result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes) else: - result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format( + result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format( hours=hours, minutes=minutes) # noqa else: result['publishedDate'] = format_date(publishedDate) @@ -525,7 +516,7 @@ def index(): return render( 'results.html', results=search_data.results, - q=search_data.query.decode('utf-8'), + q=search_data.query, selected_category=selected_category, pageno=search_data.pageno, time_range=search_data.time_range, @@ -563,10 +554,7 @@ def autocompleter(): disabled_engines = request.preferences.engines.get_disabled() # parse query - if PY3: - raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines) - else: - raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines) + raw_text_query = RawTextQuery(request.form.get('q', ''), disabled_engines) raw_text_query.parse_query() # check if search query is set @@ -673,7 +661,7 @@ def preferences(): @app.route('/image_proxy', methods=['GET']) def image_proxy(): - url = request.args.get('url').encode('utf-8') + url = request.args.get('url') if not url: return '', 400 @@ -850,7 +838,7 @@ def update_results(): def run(): logger.debug('starting webserver on %s:%s', settings['server']['port'], settings['server']['bind_address']) threading.Thread(target=update_results, name='results_updater').start() - print "engine server starting" + print("engine server starting") app.run( debug=searx_debug, use_debugger=searx_debug, @@ -858,7 +846,7 @@ def run(): host=settings['server']['bind_address'], threaded=True ) - print "wait for shutdown..." + print("wait for shutdown...") running.set() diff --git a/tests/unit/engines/pubmed.py b/tests/unit/engines/pubmed.py index 370efe067843cf53d616c8835390ae1657808691..17f3a253f922c38041080af1857759d76be42d53 100644 --- a/tests/unit/engines/pubmed.py +++ b/tests/unit/engines/pubmed.py @@ -30,7 +30,7 @@ class TestPubmedEngine(SearxTestCase): """ - response = mock.Mock(text=xml_mock.encode('utf-8')) + response = mock.Mock(text=xml_mock) results = pubmed.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) diff --git a/tests/unit/engines/test_acgsou.py b/tests/unit/engines/test_acgsou.py index c01acf5deb071afa51908851a9c57171c07fd4df..fa920dcdad9a6202224025578f417a2f483c2920 100644 --- a/tests/unit/engines/test_acgsou.py +++ b/tests/unit/engines/test_acgsou.py @@ -20,7 +20,7 @@ class TestAcgsouEngine(SearxTestCase): resp = mock.Mock(text='') self.assertEqual(acgsou.response(resp), []) - html = u""" + html = """ @@ -73,6 +73,6 @@ class TestAcgsouEngine(SearxTestCase): r = results[0] self.assertEqual(r['url'], 'http://www.acgsou.com/show-torrentid.html') - self.assertEqual(r['content'], u'Category: "testcategory テスト".') - self.assertEqual(r['title'], u'torrentname テスト') + self.assertEqual(r['content'], 'Category: "testcategory テスト".') + self.assertEqual(r['title'], 'torrentname テスト') self.assertEqual(r['filesize'], 1048576) diff --git a/tests/unit/engines/test_base.py b/tests/unit/engines/test_base.py index b5da5bde76f616661ad1df15f9a7f7e10464c746..f6e7cb6c5de762036f527642f1185697380ca66e 100644 --- a/tests/unit/engines/test_base.py +++ b/tests/unit/engines/test_base.py @@ -21,10 +21,10 @@ class TestBaseEngine(SearxTestCase): self.assertRaises(AttributeError, base.response, '') self.assertRaises(AttributeError, base.response, '[]') - response = mock.Mock(content=b'') + response = mock.Mock(content='') self.assertEqual(base.response(response), []) - xml_mock = b""" + xml_mock = """ 0 diff --git a/tests/unit/engines/test_bing.py b/tests/unit/engines/test_bing.py index 48a5e744a8ba3ca654ae980d22a8ce255ce03406..34989d8db0714dafcf45ca4c86dcb293f5ef2547 100644 --- a/tests/unit/engines/test_bing.py +++ b/tests/unit/engines/test_bing.py @@ -8,11 +8,11 @@ class TestBingEngine(SearxTestCase): def test_request(self): bing.supported_languages = ['en', 'fr', 'zh-CHS', 'zh-CHT', 'pt-PT', 'pt-BR'] - query = u'test_query' + query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 0 dicto['language'] = 'fr-FR' - params = bing.request(query.encode('utf-8'), dicto) + params = bing.request(query, dicto) self.assertTrue('url' in params) self.assertTrue(query in params['url']) self.assertTrue('language%3AFR' in params['url']) diff --git a/tests/unit/engines/test_bing_news.py b/tests/unit/engines/test_bing_news.py index 8fc26ee3268fad50003815428a744239e9ce18ae..e5f80c00bc9541798fca74f6ef180f87df8a73d2 100644 --- a/tests/unit/engines/test_bing_news.py +++ b/tests/unit/engines/test_bing_news.py @@ -71,7 +71,7 @@ class TestBingNewsEngine(SearxTestCase): """ # noqa - response = mock.Mock(content=html.encode('utf-8')) + response = mock.Mock(content=html) results = bing_news.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) @@ -110,7 +110,7 @@ class TestBingNewsEngine(SearxTestCase): """ # noqa - response = mock.Mock(content=html.encode('utf-8')) + response = mock.Mock(content=html) results = bing_news.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -133,11 +133,11 @@ class TestBingNewsEngine(SearxTestCase): """ # noqa - response = mock.Mock(content=html.encode('utf-8')) + response = mock.Mock(content=html) results = bing_news.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) html = """gabarge""" - response = mock.Mock(content=html.encode('utf-8')) + response = mock.Mock(content=html) self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response) diff --git a/tests/unit/engines/test_btdigg.py b/tests/unit/engines/test_btdigg.py index 6a88e3f755607cacf47415ebe48443ebb0d82f59..4947b71da40946c54169fba2bab040949ac1094d 100644 --- a/tests/unit/engines/test_btdigg.py +++ b/tests/unit/engines/test_btdigg.py @@ -25,7 +25,7 @@ class TestBtdiggEngine(SearxTestCase): response = mock.Mock(text='') self.assertEqual(btdigg.response(response), []) - html = u""" + html = """
@@ -82,7 +82,7 @@ class TestBtdiggEngine(SearxTestCase):
""" - response = mock.Mock(text=html.encode('utf-8')) + response = mock.Mock(text=html) results = btdigg.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -101,12 +101,12 @@ class TestBtdiggEngine(SearxTestCase): """ - response = mock.Mock(text=html.encode('utf-8')) + response = mock.Mock(text=html) results = btdigg.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) - html = u""" + html = """
@@ -367,7 +367,7 @@ class TestBtdiggEngine(SearxTestCase):
""" - response = mock.Mock(text=html.encode('utf-8')) + response = mock.Mock(text=html) results = btdigg.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 5) diff --git a/tests/unit/engines/test_currency_convert.py b/tests/unit/engines/test_currency_convert.py index fec19410390c12c51782991ca0fb8f92c8761610..e066fde0849057ee5e1599ab8578c180f5db9f2d 100644 --- a/tests/unit/engines/test_currency_convert.py +++ b/tests/unit/engines/test_currency_convert.py @@ -8,13 +8,13 @@ from searx.testing import SearxTestCase class TestCurrencyConvertEngine(SearxTestCase): def test_request(self): - query = b'test_query' + query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 params = currency_convert.request(query, dicto) self.assertNotIn('url', params) - query = b'convert 10 Pound Sterlings to United States Dollars' + query = 'convert 10 Pound Sterlings to United States Dollars' params = currency_convert.request(query, dicto) self.assertIn('url', params) self.assertIn('duckduckgo.com', params['url']) diff --git a/tests/unit/engines/test_dailymotion.py b/tests/unit/engines/test_dailymotion.py index 803b5c4d2fe61d93ba39fc00f19ca4c5eac6d45b..63659490e487f1b30a82048e7f10ba6c29a903b5 100644 --- a/tests/unit/engines/test_dailymotion.py +++ b/tests/unit/engines/test_dailymotion.py @@ -103,6 +103,6 @@ class TestDailymotionEngine(SearxTestCase): self.assertEqual(languages['af']['name'], 'Afrikaans') self.assertEqual(languages['af']['english_name'], 'Afrikaans') - self.assertEqual(languages['ar']['name'], u'العربية') + self.assertEqual(languages['ar']['name'], 'العربية') self.assertEqual(languages['ar']['english_name'], 'Arabic') self.assertEqual(languages['la']['english_name'], 'Latin') diff --git a/tests/unit/engines/test_digbt.py b/tests/unit/engines/test_digbt.py index 31c2ecabb32adc3c25b491a3c1040319ffbe8f68..3fec6773d48ec51489c083c8f2da7d087de1919a 100644 --- a/tests/unit/engines/test_digbt.py +++ b/tests/unit/engines/test_digbt.py @@ -50,7 +50,7 @@ class TestDigBTEngine(SearxTestCase): """ - response = mock.Mock(text=html.encode('utf-8')) + response = mock.Mock(text=html) results = digbt.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) diff --git a/tests/unit/engines/test_doku.py b/tests/unit/engines/test_doku.py index 22ddb7a7f5e7df3dc9b52a50365d26eeb6af0ac1..115bdd8e0a61bf83145a34f91c651e56b645140b 100644 --- a/tests/unit/engines/test_doku.py +++ b/tests/unit/engines/test_doku.py @@ -23,7 +23,7 @@ class TestDokuEngine(SearxTestCase): response = mock.Mock(text='') self.assertEqual(doku.response(response), []) - html = u""" + html = """

Pages trouvées :

    @@ -37,7 +37,7 @@ class TestDokuEngine(SearxTestCase): expected = [{'content': '', 'title': 'xfconf-query', 'url': 'http://localhost:8090/xfconf-query'}] self.assertEqual(doku.response(response), expected) - html = u""" + html = """
    xvnc: 40 Occurrences trouvées
    er = /usr/bin/Xvnc diff --git a/tests/unit/engines/test_duckduckgo.py b/tests/unit/engines/test_duckduckgo.py index eb316a4042344b18bfe4750805ebec4578190924..c39bc1bf33d76d4a483b1d71eed25e5a4480da8a 100644 --- a/tests/unit/engines/test_duckduckgo.py +++ b/tests/unit/engines/test_duckduckgo.py @@ -53,7 +53,7 @@ class TestDuckduckgoEngine(SearxTestCase): response = mock.Mock(text='') self.assertEqual(duckduckgo.response(response), []) - html = u""" + html = """