From 05a73019b16f21d9c1d45fe7d5add838364cce3c Mon Sep 17 00:00:00 2001 From: Venca24 Date: Mon, 4 Feb 2019 16:31:17 +0100 Subject: [PATCH 01/32] [fix] google scholar suggestions --- searx/settings.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/settings.yml b/searx/settings.yml index ff7782b65..c1a907ae1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -296,7 +296,7 @@ engines: url_xpath : .//h3/a/@href title_xpath : .//h3/a content_xpath : .//div[@class="gs_rs"] - suggestion_xpath : //div[@id="gs_qsuggest"]/ul/li + suggestion_xpath : //div[@id="gs_res_ccl_top"]//a/b page_size : 10 first_page_num : 0 categories : science -- GitLab From f7bdd827c4cfd92fe182d5806f1e2c35352feed9 Mon Sep 17 00:00:00 2001 From: Jonas Zohren Date: Wed, 13 Feb 2019 00:37:29 +0100 Subject: [PATCH 02/32] [enh] adds apkmirror search engine --- searx/engines/apkmirror.py | 61 ++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 6 ++++ 2 files changed, 67 insertions(+) create mode 100644 searx/engines/apkmirror.py diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py new file mode 100644 index 000000000..f2ee12b29 --- /dev/null +++ b/searx/engines/apkmirror.py @@ -0,0 +1,61 @@ +""" + APK Mirror + + @website https://www.apkmirror.com + + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, thumbnail_src +""" + +from lxml import html +from searx.engines.xpath import extract_text +from searx.url_utils import urlencode + +# engine dependent config +categories = ['it'] +paging = True + +# I am not 100% certain about this, as apkmirror appears to be a wordpress site, +# which might support time_range searching. If you want to implement it, go ahead. +time_range_support = False + +# search-url +base_url = 'https://www.apkmirror.com' +search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{query}' + + +# do search-request +def request(query, params): + + params['url'] = search_url.format(pageno=params['pageno'], + query=urlencode({'s': query})) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + # parse results + for result in dom.xpath('.//div[@id="content"]/div[@class="listWidget"]/div[@class="appRow"]'): + + link = result.xpath('.//h5/a')[0] + url = base_url + link.attrib.get('href') + '#downloads' + title = extract_text(link) + thumbnail_src = base_url + result.xpath('.//img')[0].attrib.get('src').replace('&w=32&h=32', '&w=64&h=64') + + res = { + 'url': url, + 'title': title, + 'thumbnail_src': thumbnail_src + } + + # append result + results.append(res) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml index ff7782b65..3851f49d5 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -49,6 +49,12 @@ outgoing: # communication with search engines # - 1.1.1.2 engines: + - name: apk mirror + engine: apkmirror + timeout: 4.0 + shortcut: apkm + disabled: True + - name : arch linux wiki engine : archlinux shortcut : al -- GitLab From 2478c5395d5504529e5d4b8ee09092fedbc71fbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Mon, 7 Jan 2019 21:06:53 +0100 Subject: [PATCH 03/32] update pyyaml --- requirements.txt | 2 +- searx/__init__.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index fd61b8109..701e08de9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,5 @@ idna==2.7 pygments==2.1.3 pyopenssl==18.0.0 python-dateutil==2.7.3 -pyyaml==3.13 +pyyaml==4.2b1 requests[socks]==2.19.1 diff --git a/searx/__init__.py b/searx/__init__.py index b1010f25f..4d7b2a8d3 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -22,7 +22,7 @@ from os.path import realpath, dirname, join, abspath, isfile from io import open from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION try: - from yaml import load + from yaml import safe_load except: from sys import exit, stderr stderr.write('[E] install pyyaml\n') @@ -52,7 +52,7 @@ if not settings_path: # load settings with open(settings_path, 'r', encoding='utf-8') as settings_yaml: - settings = load(settings_yaml) + settings = safe_load(settings_yaml) ''' enable debug if -- GitLab From 30bdc8704c5a90a8911b764d925da9330433391c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Mon, 7 Jan 2019 21:07:32 +0100 Subject: [PATCH 04/32] require jinja to have unique function --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 701e08de9..e8a6c33b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ flask==1.0.2 flask-babel==0.11.2 lxml==4.2.3 idna==2.7 +jinja2==2.10 pygments==2.1.3 pyopenssl==18.0.0 python-dateutil==2.7.3 -- GitLab From 252ba92fddad357c820cdc6219377f0fb633c997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sat, 23 Feb 2019 18:53:32 +0100 Subject: [PATCH 05/32] add more updates --- requirements.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index e8a6c33b6..b2ae2d453 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ -certifi==2017.11.5 +certifi==2018.11.29 +flask-babel==0.12.2 flask==1.0.2 -flask-babel==0.11.2 -lxml==4.2.3 -idna==2.7 +idna==2.8 jinja2==2.10 +lxml==4.3.0 pygments==2.1.3 -pyopenssl==18.0.0 -python-dateutil==2.7.3 +pyopenssl==19.0.0 +python-dateutil==2.7.5 pyyaml==4.2b1 -requests[socks]==2.19.1 +requests[socks]==2.21.0 -- GitLab From 8039a577a8d0501870c12facc9eba372ed4d1570 Mon Sep 17 00:00:00 2001 From: Elias Ojala Date: Tue, 12 Mar 2019 12:30:47 +0000 Subject: [PATCH 06/32] Use HTTPS for crossref --- searx/settings.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/settings.yml b/searx/settings.yml index 6e7c37fb0..f346647d1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -127,7 +127,7 @@ engines: - name : crossref engine : json_engine paging : True - search_url : http://search.crossref.org/dois?q={query}&page={pageno} + search_url : https://search.crossref.org/dois?q={query}&page={pageno} url_query : doi title_query : title content_query : fullCitation -- GitLab From f2d49a697124b8f4c6a4df68626b3d29ec959e70 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Tue, 26 Mar 2019 20:33:36 -0600 Subject: [PATCH 07/32] [fix] get youtube results from js object Results are not appearing in the html document anymore, instead they are found inside an object embedded in a script. --- searx/engines/youtube_noapi.py | 70 +++++----- tests/unit/engines/test_youtube_noapi.py | 162 ++++++++--------------- 2 files changed, 90 insertions(+), 142 deletions(-) diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 9f01841f6..3bf25932b 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -8,7 +8,8 @@ # @stable no # @parse url, title, content, publishedDate, thumbnail, embedded -from lxml import html +from functools import reduce +from json import loads from searx.engines.xpath import extract_text from searx.utils import list_get from searx.url_utils import quote_plus @@ -34,20 +35,6 @@ embedded_url = '