Loading searx/engines/duckduckgo.py +75 −41 Original line number Diff line number Diff line Loading @@ -6,7 +6,10 @@ from lxml.html import fromstring from json import loads from searx.utils import extract_text, match_language, eval_xpath from searx import logger import re logger = logger.getChild('ddg engine') # about about = { "website": 'https://duckduckgo.com/', Loading @@ -23,7 +26,7 @@ paging = False supported_languages_url = 'https://duckduckgo.com/util/u172.js' time_range_support = True safesearch = True VQD_REGEX = r"vqd='(\d+-\d+-\d+)'/"; language_aliases = { 'ar-SA': 'ar-XA', 'es-419': 'es-XL', Loading @@ -35,21 +38,14 @@ language_aliases = { } # search-url url = 'https://duckduckgo.com/?q={}' url = 'https://links.duckduckgo.com/d.js?' url_ping = 'https://duckduckgo.com/t/sl_h' time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} # specific xpath variables result_xpath = '//div[@class="links_main links_deep result__body"]' # noqa url_xpath = '//a[@class="result__snippet"]/@href' title_xpath = '//a[@class="result__a"]' content_xpath = '//a[@class="result__snippet"]' correction_xpath = '//a[@id="js-spelling-recourse-link"]' # match query's language to a region code that duckduckgo will accept def get_region_code(lang, lang_list=None): if lang == 'all': Loading @@ -61,34 +57,65 @@ def get_region_code(lang, lang_list=None): # country code goes first return lang_parts[1].lower() + '-' + lang_parts[0].lower() # def get_vqd(query): # resp = requests.get def request(query, params): if params['time_range'] is not None and params['time_range'] not in time_range_dict: return params params['url'] = url.format(query) params['method'] = 'GET' params['data']['q'] = query params['data']['b'] = '' safesearch_ddg_value = None if params['safesearch'] == 0: safesearch_ddg_value = -2 # OFF if params['safesearch'] == 2: safesearch_ddg_value = 1 # STRICT if safesearch_ddg_value is not None: params['cookies']['p'] = str(safesearch_ddg_value) region_code = get_region_code(params['language'], supported_languages) if region_code: params['data']['kl'] = region_code params['cookies']['kl'] = region_code if params['time_range'] in time_range_dict: params['data']['df'] = time_range_dict[params['time_range']] logger.debug(params) query_dict = { "q": query, 't': 'D', 'l': params["language"], 'kl': get_region_code(params["language"]), 's': 0, # TODO 'dl': 'en', 'ct': 'US', 'ss_mkt': get_region_code(params["language"]), 'df': params['time_range'], 'vqd' : "3-126340648549743517691069464246778236175-203846832012815914858366468471688211061", 'ex': -2, 'sp': '1', 'bpa': '1', 'biaexp': 'b', 'msvrtexp': 'b' } if params['safesearch'] == 2: # STRICT del query_dict['t'] query_dict['p'] = 1 query_dict.update({ 'videxp': 'a', 'nadse': 'b', 'eclsexp': 'a', 'stiaexp': 'a', 'tjsexp': 'b', 'related': 'b', 'msnexp': 'a' }) elif params['safesearch'] == 1: # MODERATE query_dict['ex'] = -1 query_dict.update({ 'nadse': 'b', 'eclsexp': 'b', 'tjsexp': 'b' }) else: # OFF query_dict['ex'] = -2 query_dict.update({ 'nadse': 'b', 'eclsexp': 'b', 'tjsexp': 'b' }) params['allow_redirects'] = False params["data"] = query_dict params["url"] = url logger.debug(params) return params Loading @@ -101,22 +128,29 @@ def response(resp): results = [] doc = fromstring(resp.text) data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text)) search_data = loads(data[0].replace('/\t/g', ' ')) if len(search_data) == 1 and ('n' not in search_data[0]): only_result = search_data[0] if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or only_result.get('a') is not None or only_result.get('d') == 'google.com search'): return titles = eval_xpath(doc, title_xpath) contents = eval_xpath(doc, content_xpath) urls = eval_xpath(doc, url_xpath) for title, content, url in zip(titles, contents, urls): results.append({'title': extract_text(title), 'content': extract_text(content), 'url': url}) for search_result in search_data: if 'n' in search_result: continue results.append({'title': search_result.get("t"), 'content': extract_text(search_result.get('a')), 'url': search_result.get('u')}) # parse correction for correction in eval_xpath(doc, correction_xpath): # append correction results.append({'correction': extract_text(correction)}) # for correction in eval_xpath(doc, correction_xpath): # # append correction # results.append({'correction': extract_text(correction)}) # return results logger.debug(results) return results Loading Loading
searx/engines/duckduckgo.py +75 −41 Original line number Diff line number Diff line Loading @@ -6,7 +6,10 @@ from lxml.html import fromstring from json import loads from searx.utils import extract_text, match_language, eval_xpath from searx import logger import re logger = logger.getChild('ddg engine') # about about = { "website": 'https://duckduckgo.com/', Loading @@ -23,7 +26,7 @@ paging = False supported_languages_url = 'https://duckduckgo.com/util/u172.js' time_range_support = True safesearch = True VQD_REGEX = r"vqd='(\d+-\d+-\d+)'/"; language_aliases = { 'ar-SA': 'ar-XA', 'es-419': 'es-XL', Loading @@ -35,21 +38,14 @@ language_aliases = { } # search-url url = 'https://duckduckgo.com/?q={}' url = 'https://links.duckduckgo.com/d.js?' url_ping = 'https://duckduckgo.com/t/sl_h' time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} # specific xpath variables result_xpath = '//div[@class="links_main links_deep result__body"]' # noqa url_xpath = '//a[@class="result__snippet"]/@href' title_xpath = '//a[@class="result__a"]' content_xpath = '//a[@class="result__snippet"]' correction_xpath = '//a[@id="js-spelling-recourse-link"]' # match query's language to a region code that duckduckgo will accept def get_region_code(lang, lang_list=None): if lang == 'all': Loading @@ -61,34 +57,65 @@ def get_region_code(lang, lang_list=None): # country code goes first return lang_parts[1].lower() + '-' + lang_parts[0].lower() # def get_vqd(query): # resp = requests.get def request(query, params): if params['time_range'] is not None and params['time_range'] not in time_range_dict: return params params['url'] = url.format(query) params['method'] = 'GET' params['data']['q'] = query params['data']['b'] = '' safesearch_ddg_value = None if params['safesearch'] == 0: safesearch_ddg_value = -2 # OFF if params['safesearch'] == 2: safesearch_ddg_value = 1 # STRICT if safesearch_ddg_value is not None: params['cookies']['p'] = str(safesearch_ddg_value) region_code = get_region_code(params['language'], supported_languages) if region_code: params['data']['kl'] = region_code params['cookies']['kl'] = region_code if params['time_range'] in time_range_dict: params['data']['df'] = time_range_dict[params['time_range']] logger.debug(params) query_dict = { "q": query, 't': 'D', 'l': params["language"], 'kl': get_region_code(params["language"]), 's': 0, # TODO 'dl': 'en', 'ct': 'US', 'ss_mkt': get_region_code(params["language"]), 'df': params['time_range'], 'vqd' : "3-126340648549743517691069464246778236175-203846832012815914858366468471688211061", 'ex': -2, 'sp': '1', 'bpa': '1', 'biaexp': 'b', 'msvrtexp': 'b' } if params['safesearch'] == 2: # STRICT del query_dict['t'] query_dict['p'] = 1 query_dict.update({ 'videxp': 'a', 'nadse': 'b', 'eclsexp': 'a', 'stiaexp': 'a', 'tjsexp': 'b', 'related': 'b', 'msnexp': 'a' }) elif params['safesearch'] == 1: # MODERATE query_dict['ex'] = -1 query_dict.update({ 'nadse': 'b', 'eclsexp': 'b', 'tjsexp': 'b' }) else: # OFF query_dict['ex'] = -2 query_dict.update({ 'nadse': 'b', 'eclsexp': 'b', 'tjsexp': 'b' }) params['allow_redirects'] = False params["data"] = query_dict params["url"] = url logger.debug(params) return params Loading @@ -101,22 +128,29 @@ def response(resp): results = [] doc = fromstring(resp.text) data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text)) search_data = loads(data[0].replace('/\t/g', ' ')) if len(search_data) == 1 and ('n' not in search_data[0]): only_result = search_data[0] if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or only_result.get('a') is not None or only_result.get('d') == 'google.com search'): return titles = eval_xpath(doc, title_xpath) contents = eval_xpath(doc, content_xpath) urls = eval_xpath(doc, url_xpath) for title, content, url in zip(titles, contents, urls): results.append({'title': extract_text(title), 'content': extract_text(content), 'url': url}) for search_result in search_data: if 'n' in search_result: continue results.append({'title': search_result.get("t"), 'content': extract_text(search_result.get('a')), 'url': search_result.get('u')}) # parse correction for correction in eval_xpath(doc, correction_xpath): # append correction results.append({'correction': extract_text(correction)}) # for correction in eval_xpath(doc, correction_xpath): # # append correction # results.append({'correction': extract_text(correction)}) # return results logger.debug(results) return results Loading