Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0c9cbcd0 authored by Nivesh Krishna's avatar Nivesh Krishna
Browse files

inital version of safe search for ddg engine

parent 2859cfa4
Loading
Loading
Loading
Loading
+75 −41
Original line number Diff line number Diff line
@@ -6,7 +6,10 @@
from lxml.html import fromstring
from json import loads
from searx.utils import extract_text, match_language, eval_xpath
from searx import logger
import re

logger = logger.getChild('ddg engine')
# about
about = {
    "website": 'https://duckduckgo.com/',
@@ -23,7 +26,7 @@ paging = False
supported_languages_url = 'https://duckduckgo.com/util/u172.js'
time_range_support = True
safesearch = True

VQD_REGEX = r"vqd='(\d+-\d+-\d+)'/";
language_aliases = {
    'ar-SA': 'ar-XA',
    'es-419': 'es-XL',
@@ -35,21 +38,14 @@ language_aliases = {
}

# search-url
url = 'https://duckduckgo.com/?q={}'
url = 'https://links.duckduckgo.com/d.js?'

url_ping = 'https://duckduckgo.com/t/sl_h'
time_range_dict = {'day': 'd',
                   'week': 'w',
                   'month': 'm',
                   'year': 'y'}

# specific xpath variables
result_xpath = '//div[@class="links_main links_deep result__body"]'  # noqa
url_xpath = '//a[@class="result__snippet"]/@href'
title_xpath = '//a[@class="result__a"]'
content_xpath = '//a[@class="result__snippet"]'
correction_xpath = '//a[@id="js-spelling-recourse-link"]'


# match query's language to a region code that duckduckgo will accept
def get_region_code(lang, lang_list=None):
    if lang == 'all':
@@ -61,34 +57,65 @@ def get_region_code(lang, lang_list=None):
    # country code goes first
    return lang_parts[1].lower() + '-' + lang_parts[0].lower()

# def get_vqd(query):
#     resp = requests.get

def request(query, params):
    if params['time_range'] is not None and params['time_range'] not in time_range_dict:
        return params

    params['url'] = url.format(query)
    params['method'] = 'GET'
    params['data']['q'] = query
    params['data']['b'] = ''

    safesearch_ddg_value = None
    if params['safesearch'] == 0:
        safesearch_ddg_value = -2  # OFF
    if params['safesearch'] == 2:
        safesearch_ddg_value = 1  # STRICT

    if safesearch_ddg_value is not None:
        params['cookies']['p'] = str(safesearch_ddg_value)

    region_code = get_region_code(params['language'], supported_languages)
    if region_code:
        params['data']['kl'] = region_code
        params['cookies']['kl'] = region_code

    if params['time_range'] in time_range_dict:
        params['data']['df'] = time_range_dict[params['time_range']]
    logger.debug(params)

    query_dict = {
        "q": query,
        't': 'D',
        'l': params["language"],
        'kl': get_region_code(params["language"]),
        's': 0, # TODO
        'dl': 'en',
        'ct': 'US',
        'ss_mkt': get_region_code(params["language"]),
        'df': params['time_range'],
        'vqd' : "3-126340648549743517691069464246778236175-203846832012815914858366468471688211061",
        'ex': -2,
        'sp': '1',
        'bpa': '1',
        'biaexp': 'b',
        'msvrtexp': 'b'
    }
    if params['safesearch'] == 2: # STRICT
        del query_dict['t']
        query_dict['p'] = 1
        query_dict.update({
                'videxp': 'a',
                'nadse': 'b',
                'eclsexp': 'a',
                'stiaexp': 'a',
                'tjsexp': 'b',
                'related': 'b',
                'msnexp': 'a'
            })
    elif params['safesearch'] == 1: # MODERATE
        query_dict['ex'] = -1
        query_dict.update({
                'nadse': 'b',
                'eclsexp': 'b',
                'tjsexp': 'b'
        })
    else: # OFF
        query_dict['ex'] = -2
        query_dict.update({
                'nadse': 'b',
                'eclsexp': 'b',
                'tjsexp': 'b'
        })

    params['allow_redirects'] = False
    params["data"] = query_dict
    params["url"] = url
    logger.debug(params)
    return params


@@ -101,22 +128,29 @@ def response(resp):
    results = []

    doc = fromstring(resp.text)
    data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text))
    search_data = loads(data[0].replace('/\t/g', '    '))

    if len(search_data) == 1 and ('n' not in search_data[0]):
        only_result = search_data[0]
        if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or only_result.get('a') is not None or only_result.get('d') == 'google.com search'):
            return
    
    titles = eval_xpath(doc, title_xpath)
    contents = eval_xpath(doc, content_xpath)
    urls = eval_xpath(doc, url_xpath)

    for title, content, url in zip(titles, contents, urls):
        results.append({'title': extract_text(title),
                        'content': extract_text(content),
                        'url': url})
    for search_result in search_data:
        if 'n' in search_result:
            continue
        results.append({'title': search_result.get("t"),
                        'content': extract_text(search_result.get('a')),
                        'url': search_result.get('u')})

    # parse correction
    for correction in eval_xpath(doc, correction_xpath):
        # append correction
        results.append({'correction': extract_text(correction)})
    # for correction in eval_xpath(doc, correction_xpath):
    #     # append correction
    #     results.append({'correction': extract_text(correction)})

    # return results
    logger.debug(results)
    return results