Loading requirements.txt +5 −5 Original line number Diff line number Diff line redis certifi==2017.11.5 flask==0.12.2 flask==1.0.2 flask-babel==0.11.2 lxml==4.1.1 idna==2.5 lxml==4.2.1 idna==2.6 pygments==2.1.3 pyopenssl==17.4.0 python-dateutil==2.6.1 pyopenssl==18.0.0 python-dateutil==2.7.3 pyyaml==3.12 requests[socks]==2.18.4 searx/engines/base.py +1 −1 Original line number Diff line number Diff line Loading @@ -55,7 +55,7 @@ shorcut_dict = { def request(query, params): # replace shortcuts with API advanced search keywords for key in shorcut_dict.keys(): query = re.sub(str(key), str(shorcut_dict[key]), query) query = re.sub(key, shorcut_dict[key], str(query)) # basic search offset = (params['pageno'] - 1) * number_of_results Loading searx/engines/bing.py +4 −1 Original line number Diff line number Diff line Loading @@ -16,7 +16,7 @@ from lxml import html from searx.engines.xpath import extract_text from searx.url_utils import urlencode from searx.utils import match_language from searx.utils import match_language, gen_useragent # engine dependent config categories = ['general'] Loading @@ -43,6 +43,9 @@ def request(query, params): offset=offset) params['url'] = base_url + search_path params['headers']['User-Agent'] = gen_useragent('Windows NT 6.3; WOW64') return params Loading searx/engines/gigablast.py +2 −0 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ search_string = 'search?{query}'\ '&qlang={lang}'\ '&ff={safesearch}'\ '&rxiec={rxieu}'\ '&ulse={ulse}'\ '&rand={rxikd}' # current unix timestamp # specific xpath variables Loading Loading @@ -64,6 +65,7 @@ def request(query, params): number_of_results=number_of_results, rxikd=int(time() * 1000), rxieu=random.randint(1000000000, 9999999999), ulse=random.randint(100000000, 999999999), lang=language, safesearch=safesearch) Loading searx/engines/google_images.py +16 −25 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ from datetime import date, timedelta from json import loads from lxml import html from searx.url_utils import urlencode from searx.url_utils import urlencode, urlparse, parse_qs # engine dependent config Loading @@ -25,10 +25,9 @@ number_of_results = 100 search_url = 'https://www.google.com/search'\ '?{query}'\ '&asearch=ichunk'\ '&async=_id:rg_s,_pms:s'\ '&tbm=isch'\ '&yv=2'\ '&gbv=1'\ '&sa=G'\ '&{search_options}' time_range_attr = "qdr:{range}" time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}" Loading Loading @@ -66,30 +65,22 @@ def request(query, params): def response(resp): results = [] g_result = loads(resp.text) dom = html.fromstring(g_result[1][1]) dom = html.fromstring(resp.text) # parse results for result in dom.xpath('//div[@data-ved]'): try: metadata = loads(''.join(result.xpath('./div[contains(@class, "rg_meta")]/text()'))) except: continue thumbnail_src = metadata['tu'] # http to https thumbnail_src = thumbnail_src.replace("http://", "https://") for img in dom.xpath('//a'): r = { 'title': u' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')), 'content': '', 'template': 'images.html', } url = urlparse(img.xpath('.//@href')[0]) query = parse_qs(url.query) r['url'] = query['imgrefurl'][0] r['img_src'] = query['imgurl'][0] r['thumbnail_src'] = r['img_src'] # append result results.append({'url': metadata['ru'], 'title': metadata['pt'], 'content': metadata['s'], 'thumbnail_src': thumbnail_src, 'img_src': metadata['ou'], 'template': 'images.html'}) results.append(r) # return results return results Loading
requirements.txt +5 −5 Original line number Diff line number Diff line redis certifi==2017.11.5 flask==0.12.2 flask==1.0.2 flask-babel==0.11.2 lxml==4.1.1 idna==2.5 lxml==4.2.1 idna==2.6 pygments==2.1.3 pyopenssl==17.4.0 python-dateutil==2.6.1 pyopenssl==18.0.0 python-dateutil==2.7.3 pyyaml==3.12 requests[socks]==2.18.4
searx/engines/base.py +1 −1 Original line number Diff line number Diff line Loading @@ -55,7 +55,7 @@ shorcut_dict = { def request(query, params): # replace shortcuts with API advanced search keywords for key in shorcut_dict.keys(): query = re.sub(str(key), str(shorcut_dict[key]), query) query = re.sub(key, shorcut_dict[key], str(query)) # basic search offset = (params['pageno'] - 1) * number_of_results Loading
searx/engines/bing.py +4 −1 Original line number Diff line number Diff line Loading @@ -16,7 +16,7 @@ from lxml import html from searx.engines.xpath import extract_text from searx.url_utils import urlencode from searx.utils import match_language from searx.utils import match_language, gen_useragent # engine dependent config categories = ['general'] Loading @@ -43,6 +43,9 @@ def request(query, params): offset=offset) params['url'] = base_url + search_path params['headers']['User-Agent'] = gen_useragent('Windows NT 6.3; WOW64') return params Loading
searx/engines/gigablast.py +2 −0 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ search_string = 'search?{query}'\ '&qlang={lang}'\ '&ff={safesearch}'\ '&rxiec={rxieu}'\ '&ulse={ulse}'\ '&rand={rxikd}' # current unix timestamp # specific xpath variables Loading Loading @@ -64,6 +65,7 @@ def request(query, params): number_of_results=number_of_results, rxikd=int(time() * 1000), rxieu=random.randint(1000000000, 9999999999), ulse=random.randint(100000000, 999999999), lang=language, safesearch=safesearch) Loading
searx/engines/google_images.py +16 −25 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ from datetime import date, timedelta from json import loads from lxml import html from searx.url_utils import urlencode from searx.url_utils import urlencode, urlparse, parse_qs # engine dependent config Loading @@ -25,10 +25,9 @@ number_of_results = 100 search_url = 'https://www.google.com/search'\ '?{query}'\ '&asearch=ichunk'\ '&async=_id:rg_s,_pms:s'\ '&tbm=isch'\ '&yv=2'\ '&gbv=1'\ '&sa=G'\ '&{search_options}' time_range_attr = "qdr:{range}" time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}" Loading Loading @@ -66,30 +65,22 @@ def request(query, params): def response(resp): results = [] g_result = loads(resp.text) dom = html.fromstring(g_result[1][1]) dom = html.fromstring(resp.text) # parse results for result in dom.xpath('//div[@data-ved]'): try: metadata = loads(''.join(result.xpath('./div[contains(@class, "rg_meta")]/text()'))) except: continue thumbnail_src = metadata['tu'] # http to https thumbnail_src = thumbnail_src.replace("http://", "https://") for img in dom.xpath('//a'): r = { 'title': u' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')), 'content': '', 'template': 'images.html', } url = urlparse(img.xpath('.//@href')[0]) query = parse_qs(url.query) r['url'] = query['imgrefurl'][0] r['img_src'] = query['imgurl'][0] r['thumbnail_src'] = r['img_src'] # append result results.append({'url': metadata['ru'], 'title': metadata['pt'], 'content': metadata['s'], 'thumbnail_src': thumbnail_src, 'img_src': metadata['ou'], 'template': 'images.html'}) results.append(r) # return results return results