Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d0177917 authored by Nivesh Krishna's avatar Nivesh Krishna
Browse files

fix ddg

parent 007d2e21
Loading
Loading
Loading
Loading
+0 −1
Original line number Original line Diff line number Diff line
@@ -19,5 +19,4 @@ sphinx-autobuild==2021.3.14
linuxdoc==20211220
linuxdoc==20211220
aiounittest==1.4.1
aiounittest==1.4.1
numexpr==2.8.1
numexpr==2.8.1
werkzeug==2.0.3
wrapt-timeout-decorator==1.3.8
wrapt-timeout-decorator==1.3.8
+0 −1
Original line number Original line Diff line number Diff line
@@ -19,7 +19,6 @@ setproctitle==1.3.1
redis==3.4.1
redis==3.4.1
ring==0.7.3
ring==0.7.3
numexpr==2.8.1
numexpr==2.8.1
werkzeug==2.0.3
wrapt-timeout-decorator==1.3.8
wrapt-timeout-decorator==1.3.8
pyyaml==6.0
pyyaml==6.0
requests[socks]==2.28.1
requests[socks]==2.28.1
+28 −48
Original line number Original line Diff line number Diff line
@@ -13,7 +13,7 @@ from searx.network import get
logger = logger.getChild('ddg engine')
logger = logger.getChild('ddg engine')
# about
# about
about = {
about = {
    "website": 'https://lite.duckduckgo.com/lite',
    "website": 'https://duckduckgo.com/',
    "wikidata_id": 'Q12805',
    "wikidata_id": 'Q12805',
    "official_api_documentation": 'https://duckduckgo.com/api',
    "official_api_documentation": 'https://duckduckgo.com/api',
    "use_official_api": False,
    "use_official_api": False,
@@ -22,11 +22,13 @@ about = {
}
}


# engine dependent config
# engine dependent config
categories = ['general', 'web']
categories = ['general']
paging = True
paging = True
supported_languages_url = 'https://duckduckgo.com/util/u588.js'
supported_languages_url = 'https://duckduckgo.com/util/u172.js'
number_of_results = 10
time_range_support = True
time_range_support = True

safesearch = True
VQD_REGEX = r"vqd='(\d+-\d+-\d+)'"
language_aliases = {
language_aliases = {
    'ca-ES': 'ct-ca',
    'ca-ES': 'ct-ca',
    'de-AT': 'de-de',
    'de-AT': 'de-de',
@@ -43,14 +45,16 @@ language_aliases = {
    'ko': 'kr-KR',
    'ko': 'kr-KR',
    'sl-SI': 'sl-SL',
    'sl-SI': 'sl-SL',
    'zh-TW': 'tzh-TW',
    'zh-TW': 'tzh-TW',
    'zh-HK': 'tzh-HK',
    'zh-HK': 'tzh-HK'
}
}


time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}

# search-url
# search-url
url = 'https://lite.duckduckgo.com/lite'
url = 'https://links.duckduckgo.com/d.js?'
url_ping = 'https://duckduckgo.com/t/sl_l'
url_ping = 'https://duckduckgo.com/t/sl_h'
time_range_dict = {'day': 'd',
                   'week': 'w',
                   'month': 'm',
                   'year': 'y'}




# match query's language to a region code that duckduckgo will accept
# match query's language to a region code that duckduckgo will accept
@@ -65,16 +69,15 @@ def get_region_code(lang, lang_list=None):
    return lang_parts[1].lower() + '-' + lang_parts[0].lower()
    return lang_parts[1].lower() + '-' + lang_parts[0].lower()




def request(query, params):
def get_vqd(query, headers):
    resp = get(f"https://duckduckgo.com/?q={query}&ia=web", headers=headers)
    resp = re.findall(VQD_REGEX, resp.text)
    return resp[0]


    params['url'] = url
    params['method'] = 'POST'


    params['data']['q'] = query
def request(query, params):


    # The API is not documented, so we do some reverse engineering and emulate
    params['method'] = 'GET'
    # what https://lite.duckduckgo.com/lite/ does when you press "next Page"
    # link again and again ..


    vqd = get_vqd(query, params["headers"])
    vqd = get_vqd(query, params["headers"])
    dl, ct = match_language(params["language"], supported_languages, language_aliases, 'wt-WT').split("-")
    dl, ct = match_language(params["language"], supported_languages, language_aliases, 'wt-WT').split("-")
@@ -134,40 +137,17 @@ def request(query, params):


# get response from search-request
# get response from search-request
def response(resp):
def response(resp):

    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
    get(url_ping, headers=headers_ping)

    if resp.status_code == 303:
    if resp.status_code == 303:
        return []
        return []


    # parse the response
    results = []
    results = []
    doc = fromstring(resp.text)

    result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
    if not len(result_table) >= 3:
        # no more results
        return []
    result_table = result_table[2]


    tr_rows = eval_xpath(result_table, './/tr')
    data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text))

    try:
    # In the last <tr> is the form of the 'previous/next page' links
        search_data = loads(data[0].replace('/\t/g', '    '))
    tr_rows = tr_rows[:-1]
    except IndexError:

        return
    len_tr_rows = len(tr_rows)
    offset = 0

    while len_tr_rows >= offset + 4:

        # assemble table rows we need to scrap
        tr_title = tr_rows[offset]
        tr_content = tr_rows[offset + 1]
        offset += 4

        # ignore sponsored Adds <tr class="result-sponsored">
        if tr_content.get('class') == 'result-sponsored':
            continue


    if len(search_data) == 1 and ('n' not in search_data[0]):
    if len(search_data) == 1 and ('n' not in search_data[0]):
        only_result = search_data[0]
        only_result = search_data[0]
@@ -175,8 +155,8 @@ def response(resp):
                only_result.get('a') is not None or only_result.get('d') == 'google.com search'):
                only_result.get('a') is not None or only_result.get('d') == 'google.com search'):
            return
            return


        td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
    for search_result in search_data:
        if td_content is None:
        if 'n' in search_result:
            continue
            continue


        title = HTMLTextExtractor()
        title = HTMLTextExtractor()