Loading searx/engines/duckduckgo.py +26 −31 Original line number Diff line number Diff line Loading @@ -3,9 +3,8 @@ DuckDuckGo (Web) """ from lxml.html import fromstring from json import loads from searx.utils import extract_text, match_language, eval_xpath from searx.utils import extract_text, match_language from searx import logger import re import httpx Loading @@ -27,7 +26,7 @@ paging = False supported_languages_url = 'https://duckduckgo.com/util/u172.js' time_range_support = True safesearch = True VQD_REGEX = r"vqd='(\d+-\d+-\d+)'"; VQD_REGEX = r"vqd='(\d+-\d+-\d+)'" language_aliases = { 'ar-SA': 'ar-XA', 'es-419': 'es-XL', Loading @@ -47,6 +46,7 @@ time_range_dict = {'day': 'd', 'month': 'm', 'year': 'y'} # match query's language to a region code that duckduckgo will accept def get_region_code(lang, lang_list=None): if lang == 'all': Loading @@ -58,11 +58,13 @@ def get_region_code(lang, lang_list=None): # country code goes first return lang_parts[1].lower() + '-' + lang_parts[0].lower() def get_vqd(query): resp = httpx.get(f"https://duckduckgo.com/?q={query}&ia=web") resp = re.findall(VQD_REGEX, resp.text) return resp[0] def request(query, params): if params['time_range'] is not None and params['time_range'] not in time_range_dict: return params Loading @@ -75,7 +77,7 @@ def request(query, params): 't': 'D', 'l': params["language"], 'kl': get_region_code(params["language"]), 's': 0, # TODO 's': 0, 'dl': 'en', 'ct': 'US', 'ss_mkt': get_region_code(params["language"]), Loading Loading @@ -128,28 +130,21 @@ def response(resp): # parse the response results = [] doc = fromstring(resp.text) data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text)) search_data = loads(data[0].replace('/\t/g', ' ')) if len(search_data) == 1 and ('n' not in search_data[0]): only_result = search_data[0] if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or only_result.get('a') is not None or only_result.get('d') == 'google.com search'): if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or only_result.get('a') is not None or only_result.get('d') == 'google.com search'): return for search_result in search_data: if 'n' in search_result: continue results.append({'title': search_result.get("t"), 'content': extract_text(search_result.get('a')), 'url': search_result.get('u')}) # parse correction # for correction in eval_xpath(doc, correction_xpath): # # append correction # results.append({'correction': extract_text(correction)}) return results Loading Loading
searx/engines/duckduckgo.py +26 −31 Original line number Diff line number Diff line Loading @@ -3,9 +3,8 @@ DuckDuckGo (Web) """ from lxml.html import fromstring from json import loads from searx.utils import extract_text, match_language, eval_xpath from searx.utils import extract_text, match_language from searx import logger import re import httpx Loading @@ -27,7 +26,7 @@ paging = False supported_languages_url = 'https://duckduckgo.com/util/u172.js' time_range_support = True safesearch = True VQD_REGEX = r"vqd='(\d+-\d+-\d+)'"; VQD_REGEX = r"vqd='(\d+-\d+-\d+)'" language_aliases = { 'ar-SA': 'ar-XA', 'es-419': 'es-XL', Loading @@ -47,6 +46,7 @@ time_range_dict = {'day': 'd', 'month': 'm', 'year': 'y'} # match query's language to a region code that duckduckgo will accept def get_region_code(lang, lang_list=None): if lang == 'all': Loading @@ -58,11 +58,13 @@ def get_region_code(lang, lang_list=None): # country code goes first return lang_parts[1].lower() + '-' + lang_parts[0].lower() def get_vqd(query): resp = httpx.get(f"https://duckduckgo.com/?q={query}&ia=web") resp = re.findall(VQD_REGEX, resp.text) return resp[0] def request(query, params): if params['time_range'] is not None and params['time_range'] not in time_range_dict: return params Loading @@ -75,7 +77,7 @@ def request(query, params): 't': 'D', 'l': params["language"], 'kl': get_region_code(params["language"]), 's': 0, # TODO 's': 0, 'dl': 'en', 'ct': 'US', 'ss_mkt': get_region_code(params["language"]), Loading Loading @@ -128,28 +130,21 @@ def response(resp): # parse the response results = [] doc = fromstring(resp.text) data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text)) search_data = loads(data[0].replace('/\t/g', ' ')) if len(search_data) == 1 and ('n' not in search_data[0]): only_result = search_data[0] if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or only_result.get('a') is not None or only_result.get('d') == 'google.com search'): if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or only_result.get('a') is not None or only_result.get('d') == 'google.com search'): return for search_result in search_data: if 'n' in search_result: continue results.append({'title': search_result.get("t"), 'content': extract_text(search_result.get('a')), 'url': search_result.get('u')}) # parse correction # for correction in eval_xpath(doc, correction_xpath): # # append correction # results.append({'correction': extract_text(correction)}) return results Loading