Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b562bce5 authored by Nivesh Krishna's avatar Nivesh Krishna Committed by Nivesh Krishna
Browse files

inital version of safe search for ddg engine

parent c70198ae
Loading
Loading
Loading
Loading
+75 −41
Original line number Diff line number Diff line
@@ -6,7 +6,10 @@
from lxml.html import fromstring
from json import loads
from searx.utils import extract_text, match_language, eval_xpath
from searx import logger
import re

logger = logger.getChild('ddg engine')
# about
about = {
    "website": 'https://duckduckgo.com/',
@@ -23,7 +26,7 @@ paging = False
supported_languages_url = 'https://duckduckgo.com/util/u172.js'
time_range_support = True
safesearch = True

VQD_REGEX = r"vqd='(\d+-\d+-\d+)'/";
language_aliases = {
    'ar-SA': 'ar-XA',
    'es-419': 'es-XL',
@@ -35,21 +38,14 @@ language_aliases = {
}

# search-url
url = 'https://duckduckgo.com/?q={}'
url = 'https://links.duckduckgo.com/d.js?'

url_ping = 'https://duckduckgo.com/t/sl_h'
time_range_dict = {'day': 'd',
                   'week': 'w',
                   'month': 'm',
                   'year': 'y'}

# specific xpath variables
result_xpath = '//div[@class="links_main links_deep result__body"]'  # noqa
url_xpath = '//a[@class="result__snippet"]/@href'
title_xpath = '//a[@class="result__a"]'
content_xpath = '//a[@class="result__snippet"]'
correction_xpath = '//a[@id="js-spelling-recourse-link"]'


# match query's language to a region code that duckduckgo will accept
def get_region_code(lang, lang_list=None):
    if lang == 'all':
@@ -61,34 +57,65 @@ def get_region_code(lang, lang_list=None):
    # country code goes first
    return lang_parts[1].lower() + '-' + lang_parts[0].lower()

# def get_vqd(query):
#     resp = requests.get

def request(query, params):
    if params['time_range'] is not None and params['time_range'] not in time_range_dict:
        return params

    params['url'] = url.format(query)
    params['method'] = 'GET'
    params['data']['q'] = query
    params['data']['b'] = ''

    safesearch_ddg_value = None
    if params['safesearch'] == 0:
        safesearch_ddg_value = -2  # OFF
    if params['safesearch'] == 2:
        safesearch_ddg_value = 1  # STRICT

    if safesearch_ddg_value is not None:
        params['cookies']['p'] = str(safesearch_ddg_value)

    region_code = get_region_code(params['language'], supported_languages)
    if region_code:
        params['data']['kl'] = region_code
        params['cookies']['kl'] = region_code

    if params['time_range'] in time_range_dict:
        params['data']['df'] = time_range_dict[params['time_range']]
    logger.debug(params)

    query_dict = {
        "q": query,
        't': 'D',
        'l': params["language"],
        'kl': get_region_code(params["language"]),
        's': 0, # TODO
        'dl': 'en',
        'ct': 'US',
        'ss_mkt': get_region_code(params["language"]),
        'df': params['time_range'],
        'vqd' : "3-126340648549743517691069464246778236175-203846832012815914858366468471688211061",
        'ex': -2,
        'sp': '1',
        'bpa': '1',
        'biaexp': 'b',
        'msvrtexp': 'b'
    }
    if params['safesearch'] == 2: # STRICT
        del query_dict['t']
        query_dict['p'] = 1
        query_dict.update({
                'videxp': 'a',
                'nadse': 'b',
                'eclsexp': 'a',
                'stiaexp': 'a',
                'tjsexp': 'b',
                'related': 'b',
                'msnexp': 'a'
            })
    elif params['safesearch'] == 1: # MODERATE
        query_dict['ex'] = -1
        query_dict.update({
                'nadse': 'b',
                'eclsexp': 'b',
                'tjsexp': 'b'
        })
    else: # OFF
        query_dict['ex'] = -2
        query_dict.update({
                'nadse': 'b',
                'eclsexp': 'b',
                'tjsexp': 'b'
        })

    params['allow_redirects'] = False
    params["data"] = query_dict
    params["url"] = url
    logger.debug(params)
    return params


@@ -101,22 +128,29 @@ def response(resp):
    results = []

    doc = fromstring(resp.text)
    data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text))
    search_data = loads(data[0].replace('/\t/g', '    '))

    if len(search_data) == 1 and ('n' not in search_data[0]):
        only_result = search_data[0]
        if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or only_result.get('a') is not None or only_result.get('d') == 'google.com search'):
            return
    
    titles = eval_xpath(doc, title_xpath)
    contents = eval_xpath(doc, content_xpath)
    urls = eval_xpath(doc, url_xpath)

    for title, content, url in zip(titles, contents, urls):
        results.append({'title': extract_text(title),
                        'content': extract_text(content),
                        'url': url})
    for search_result in search_data:
        if 'n' in search_result:
            continue
        results.append({'title': search_result.get("t"),
                        'content': extract_text(search_result.get('a')),
                        'url': search_result.get('u')})

    # parse correction
    for correction in eval_xpath(doc, correction_xpath):
        # append correction
        results.append({'correction': extract_text(correction)})
    # for correction in eval_xpath(doc, correction_xpath):
    #     # append correction
    #     results.append({'correction': extract_text(correction)})

    # return results
    logger.debug(results)
    return results