From fa4eb51c603970bc796b6884e1882665ecc6e7a4 Mon Sep 17 00:00:00 2001
From: Israel Yago Pereira <israelyago@e.email>
Date: Thu, 23 Dec 2021 16:41:53 -0300
Subject: [PATCH 01/20] Implementing ddg main search engine

---
 searx/engines/duckduckgo.py      | 140 +++++++----------------
 searx/engines/duckduckgo_lite.py | 186 +++++++++++++++++++++++++++++++
 searx/settings.yml               |   4 +
 3 files changed, 230 insertions(+), 100 deletions(-)
 create mode 100644 searx/engines/duckduckgo_lite.py

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index d871a629f..c8653e916 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -1,24 +1,16 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-# lint: pylint
-"""DuckDuckGo Lite
 """
-
-from json import loads
+ DuckDuckGo (Web)
+"""
 
 from lxml.html import fromstring
-
-from searx.utils import (
-    dict_subset,
-    eval_xpath,
-    eval_xpath_getindex,
-    extract_text,
-    match_language,
-)
+from json import loads
+from searx.utils import extract_text, match_language, eval_xpath, dict_subset
 from searx.network import get
 
 # about
 about = {
-    "website": 'https://lite.duckduckgo.com/lite',
+    "website": 'https://duckduckgo.com/',
     "wikidata_id": 'Q12805',
     "official_api_documentation": 'https://duckduckgo.com/api',
     "use_official_api": False,
@@ -28,8 +20,8 @@ about = {
 
 # engine dependent config
 categories = ['general']
-paging = True
-supported_languages_url = 'https://duckduckgo.com/util/u588.js'
+paging = False
+supported_languages_url = 'https://duckduckgo.com/util/u172.js'
 time_range_support = True
 
 language_aliases = {
@@ -42,16 +34,20 @@ language_aliases = {
     'zh-HK': 'tzh-HK'
 }
 
-time_range_dict = {
-    'day': 'd',
-    'week': 'w',
-    'month': 'm',
-    'year': 'y'
-}
-
 # search-url
-url = 'https://lite.duckduckgo.com/lite'
-url_ping = 'https://duckduckgo.com/t/sl_l'
+url = 'https://html.duckduckgo.com/html/?q={}'
+url_ping = 'https://duckduckgo.com/t/sl_h'
+time_range_dict = {'day': 'd',
+                   'week': 'w',
+                   'month': 'm',
+                   'year': 'y'}
+
+# specific xpath variables
+result_xpath = '//div[@class="links_main links_deep result__body"]'  # noqa
+url_xpath = '//a[@class="result__snippet"]/@href'
+title_xpath = '//a[@class="result__a"]'
+content_xpath = '//a[@class="result__snippet"]'
+correction_xpath = '//a[@id="js-spelling-recourse-link"]'
 
 
 # match query's language to a region code that duckduckgo will accept
@@ -67,108 +63,52 @@ def get_region_code(lang, lang_list=None):
 
 
 def request(query, params):
+    if params['time_range'] is not None and params['time_range'] not in time_range_dict:
+        return params
 
-    params['url'] = url
+    params['url'] = url.format(query)
     params['method'] = 'POST'
-
     params['data']['q'] = query
-
-    # The API is not documented, so we do some reverse engineering and emulate
-    # what https://lite.duckduckgo.com/lite/ does when you press "next Page"
-    # link again and again ..
-
-    params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
-
-    # initial page does not have an offset
-    if params['pageno'] == 2:
-        # second page does have an offset of 30
-        offset = (params['pageno'] - 1) * 30
-        params['data']['s'] = offset
-        params['data']['dc'] = offset + 1
-
-    elif params['pageno'] > 2:
-        # third and following pages do have an offset of 30 + n*50
-        offset = 30 + (params['pageno'] - 2) * 50
-        params['data']['s'] = offset
-        params['data']['dc'] = offset + 1
-
-    # initial page does not have additional data in the input form
-    if params['pageno'] > 1:
-        # request the second page (and more pages) needs 'o' and 'api' arguments
-        params['data']['o'] = 'json'
-        params['data']['api'] = 'd.js'
-
-    # initial page does not have additional data in the input form
-    if params['pageno'] > 2:
-        # request the third page (and more pages) some more arguments
-        params['data']['nextParams'] = ''
-        params['data']['v'] = ''
-        params['data']['vqd'] = ''
+    params['data']['b'] = ''
 
     region_code = get_region_code(params['language'], supported_languages)
     if region_code:
         params['data']['kl'] = region_code
         params['cookies']['kl'] = region_code
 
-    params['data']['df'] = ''
     if params['time_range'] in time_range_dict:
         params['data']['df'] = time_range_dict[params['time_range']]
-        params['cookies']['df'] = time_range_dict[params['time_range']]
 
+    params['allow_redirects'] = False
     return params
 
 
 # get response from search-request
 def response(resp):
-
-    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
-    get(url_ping, headers=headers_ping)
-
     if resp.status_code == 303:
         return []
 
+    # parse the response
     results = []
     doc = fromstring(resp.text)
+    
+    titles = eval_xpath(doc, title_xpath)
+    contents = eval_xpath(doc, content_xpath)
+    urls  = eval_xpath(doc, url_xpath)
 
-    result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
-    if not len(result_table) >= 3:
-        # no more results
-        return []
-    result_table = result_table[2]
-
-    tr_rows = eval_xpath(result_table, './/tr')
-
-    # In the last <tr> is the form of the 'previous/next page' links
-    tr_rows = tr_rows[:-1]
-
-    len_tr_rows = len(tr_rows)
-    offset = 0
-
-    while len_tr_rows >= offset + 4:
-
-        # assemble table rows we need to scrap
-        tr_title = tr_rows[offset]
-        tr_content = tr_rows[offset + 1]
-        offset += 4
-
-        # ignore sponsored Adds <tr class="result-sponsored">
-        if tr_content.get('class') == 'result-sponsored':
-            continue
-
-        a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
-        if a_tag is None:
-            continue
+    for title, content, url in zip(titles, contents, urls):
+        print(extract_text(content))
 
-        td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
-        if td_content is None:
-            continue
+        results.append({'title': extract_text(title),
+                        'content': extract_text(content),
+                        'url': url})
 
-        results.append({
-            'title': a_tag.text_content(),
-            'content': extract_text(td_content),
-            'url': a_tag.get('href'),
-        })
+    # parse correction
+    for correction in eval_xpath(doc, correction_xpath):
+        # append correction
+        results.append({'correction': extract_text(correction)})
 
+    # return results
     return results
 
 
diff --git a/searx/engines/duckduckgo_lite.py b/searx/engines/duckduckgo_lite.py
new file mode 100644
index 000000000..d871a629f
--- /dev/null
+++ b/searx/engines/duckduckgo_lite.py
@@ -0,0 +1,186 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""DuckDuckGo Lite
+"""
+
+from json import loads
+
+from lxml.html import fromstring
+
+from searx.utils import (
+    dict_subset,
+    eval_xpath,
+    eval_xpath_getindex,
+    extract_text,
+    match_language,
+)
+from searx.network import get
+
+# about
+about = {
+    "website": 'https://lite.duckduckgo.com/lite',
+    "wikidata_id": 'Q12805',
+    "official_api_documentation": 'https://duckduckgo.com/api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
+# engine dependent config
+categories = ['general']
+paging = True
+supported_languages_url = 'https://duckduckgo.com/util/u588.js'
+time_range_support = True
+
+language_aliases = {
+    'ar-SA': 'ar-XA',
+    'es-419': 'es-XL',
+    'ja': 'jp-JP',
+    'ko': 'kr-KR',
+    'sl-SI': 'sl-SL',
+    'zh-TW': 'tzh-TW',
+    'zh-HK': 'tzh-HK'
+}
+
+time_range_dict = {
+    'day': 'd',
+    'week': 'w',
+    'month': 'm',
+    'year': 'y'
+}
+
+# search-url
+url = 'https://lite.duckduckgo.com/lite'
+url_ping = 'https://duckduckgo.com/t/sl_l'
+
+
+# match query's language to a region code that duckduckgo will accept
+def get_region_code(lang, lang_list=None):
+    if lang == 'all':
+        return None
+
+    lang_code = match_language(lang, lang_list or [], language_aliases, 'wt-WT')
+    lang_parts = lang_code.split('-')
+
+    # country code goes first
+    return lang_parts[1].lower() + '-' + lang_parts[0].lower()
+
+
+def request(query, params):
+
+    params['url'] = url
+    params['method'] = 'POST'
+
+    params['data']['q'] = query
+
+    # The API is not documented, so we do some reverse engineering and emulate
+    # what https://lite.duckduckgo.com/lite/ does when you press "next Page"
+    # link again and again ..
+
+    params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
+
+    # initial page does not have an offset
+    if params['pageno'] == 2:
+        # second page does have an offset of 30
+        offset = (params['pageno'] - 1) * 30
+        params['data']['s'] = offset
+        params['data']['dc'] = offset + 1
+
+    elif params['pageno'] > 2:
+        # third and following pages do have an offset of 30 + n*50
+        offset = 30 + (params['pageno'] - 2) * 50
+        params['data']['s'] = offset
+        params['data']['dc'] = offset + 1
+
+    # initial page does not have additional data in the input form
+    if params['pageno'] > 1:
+        # request the second page (and more pages) needs 'o' and 'api' arguments
+        params['data']['o'] = 'json'
+        params['data']['api'] = 'd.js'
+
+    # initial page does not have additional data in the input form
+    if params['pageno'] > 2:
+        # request the third page (and more pages) some more arguments
+        params['data']['nextParams'] = ''
+        params['data']['v'] = ''
+        params['data']['vqd'] = ''
+
+    region_code = get_region_code(params['language'], supported_languages)
+    if region_code:
+        params['data']['kl'] = region_code
+        params['cookies']['kl'] = region_code
+
+    params['data']['df'] = ''
+    if params['time_range'] in time_range_dict:
+        params['data']['df'] = time_range_dict[params['time_range']]
+        params['cookies']['df'] = time_range_dict[params['time_range']]
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+
+    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
+    get(url_ping, headers=headers_ping)
+
+    if resp.status_code == 303:
+        return []
+
+    results = []
+    doc = fromstring(resp.text)
+
+    result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
+    if not len(result_table) >= 3:
+        # no more results
+        return []
+    result_table = result_table[2]
+
+    tr_rows = eval_xpath(result_table, './/tr')
+
+    # In the last <tr> is the form of the 'previous/next page' links
+    tr_rows = tr_rows[:-1]
+
+    len_tr_rows = len(tr_rows)
+    offset = 0
+
+    while len_tr_rows >= offset + 4:
+
+        # assemble table rows we need to scrap
+        tr_title = tr_rows[offset]
+        tr_content = tr_rows[offset + 1]
+        offset += 4
+
+        # ignore sponsored Adds <tr class="result-sponsored">
+        if tr_content.get('class') == 'result-sponsored':
+            continue
+
+        a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
+        if a_tag is None:
+            continue
+
+        td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
+        if td_content is None:
+            continue
+
+        results.append({
+            'title': a_tag.text_content(),
+            'content': extract_text(td_content),
+            'url': a_tag.get('href'),
+        })
+
+    return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+
+    # response is a js file with regions as an embedded object
+    response_page = resp.text
+    response_page = response_page[response_page.find('regions:{') + 8:]
+    response_page = response_page[:response_page.find('}') + 1]
+
+    regions_json = loads(response_page)
+    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
+
+    return list(supported_languages)
diff --git a/searx/settings.yml b/searx/settings.yml
index 92be3fbeb..b2341f84e 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -408,6 +408,10 @@ engines:
     engine : duckduckgo
     shortcut : ddg
 
+  - name : duckduckgo (lite)
+    engine : duckduckgo_lite
+    shortcut : ddgl
+
   - name : duckduckgo images
     engine : duckduckgo_images
     shortcut : ddi
-- 
GitLab


From 009e75abe99b2c75d37b2994f0ede13e4c8bf0a1 Mon Sep 17 00:00:00 2001
From: Israel Yago Pereira <israelyago@e.email>
Date: Mon, 3 Jan 2022 15:12:48 -0300
Subject: [PATCH 02/20] Update ddg url and add safesearch

---
 searx/engines/duckduckgo.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index c8653e916..f93c7815f 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -23,6 +23,7 @@ categories = ['general']
 paging = False
 supported_languages_url = 'https://duckduckgo.com/util/u172.js'
 time_range_support = True
+safesearch = True
 
 language_aliases = {
     'ar-SA': 'ar-XA',
@@ -35,7 +36,7 @@ language_aliases = {
 }
 
 # search-url
-url = 'https://html.duckduckgo.com/html/?q={}'
+url = 'https://duckduckgo.com/?q={}'
 url_ping = 'https://duckduckgo.com/t/sl_h'
 time_range_dict = {'day': 'd',
                    'week': 'w',
@@ -67,10 +68,19 @@ def request(query, params):
         return params
 
     params['url'] = url.format(query)
-    params['method'] = 'POST'
+    params['method'] = 'GET'
     params['data']['q'] = query
     params['data']['b'] = ''
 
+    safesearch_ddg_value = None
+    if params['safesearch'] == 0:
+        safesearch_ddg_value = -2 # OFF
+    if params['safesearch'] == 2:
+        safesearch_ddg_value = 1 # STRICT
+
+    if safesearch_ddg_value != None:
+        params['cookies']['p'] = str(safesearch_ddg_value)
+
     region_code = get_region_code(params['language'], supported_languages)
     if region_code:
         params['data']['kl'] = region_code
@@ -90,15 +100,14 @@ def response(resp):
 
     # parse the response
     results = []
+
     doc = fromstring(resp.text)
-    
+
     titles = eval_xpath(doc, title_xpath)
     contents = eval_xpath(doc, content_xpath)
-    urls  = eval_xpath(doc, url_xpath)
+    urls = eval_xpath(doc, url_xpath)
 
     for title, content, url in zip(titles, contents, urls):
-        print(extract_text(content))
-
         results.append({'title': extract_text(title),
                         'content': extract_text(content),
                         'url': url})
-- 
GitLab


From 104d880bf50390503699ef2a951146b233f39992 Mon Sep 17 00:00:00 2001
From: Israel Yago Pereira <israelyago@e.email>
Date: Mon, 3 Jan 2022 15:22:05 -0300
Subject: [PATCH 03/20] Fix small pep8 issues

---
 searx/engines/duckduckgo.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index f93c7815f..a1055767f 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -74,11 +74,11 @@ def request(query, params):
 
     safesearch_ddg_value = None
     if params['safesearch'] == 0:
-        safesearch_ddg_value = -2 # OFF
+        safesearch_ddg_value = -2  # OFF
     if params['safesearch'] == 2:
-        safesearch_ddg_value = 1 # STRICT
+        safesearch_ddg_value = 1  # STRICT
 
-    if safesearch_ddg_value != None:
+    if safesearch_ddg_value is not None:
         params['cookies']['p'] = str(safesearch_ddg_value)
 
     region_code = get_region_code(params['language'], supported_languages)
-- 
GitLab


From 2859cfa4930192ae9e5d16978ff0b4bdbe1528dd Mon Sep 17 00:00:00 2001
From: Israel Yago Pereira <israelyago@e.email>
Date: Mon, 3 Jan 2022 15:45:33 -0300
Subject: [PATCH 04/20] Remove unused imports

---
 searx/engines/duckduckgo.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index a1055767f..e2b8ea4c8 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -5,8 +5,7 @@
 
 from lxml.html import fromstring
 from json import loads
-from searx.utils import extract_text, match_language, eval_xpath, dict_subset
-from searx.network import get
+from searx.utils import extract_text, match_language, eval_xpath
 
 # about
 about = {
-- 
GitLab


From 0c9cbcd05e227f8880821d9ee455abce2761c6f7 Mon Sep 17 00:00:00 2001
From: nivesh <nivesh@e.email>
Date: Fri, 11 Feb 2022 01:35:33 +0530
Subject: [PATCH 05/20] inital version of safe search for ddg engine

---
 searx/engines/duckduckgo.py | 116 +++++++++++++++++++++++-------------
 1 file changed, 75 insertions(+), 41 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index e2b8ea4c8..80d18c80d 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -6,7 +6,10 @@
 from lxml.html import fromstring
 from json import loads
 from searx.utils import extract_text, match_language, eval_xpath
+from searx import logger
+import re
 
+logger = logger.getChild('ddg engine')
 # about
 about = {
     "website": 'https://duckduckgo.com/',
@@ -23,7 +26,7 @@ paging = False
 supported_languages_url = 'https://duckduckgo.com/util/u172.js'
 time_range_support = True
 safesearch = True
-
+VQD_REGEX = r"vqd='(\d+-\d+-\d+)'/";
 language_aliases = {
     'ar-SA': 'ar-XA',
     'es-419': 'es-XL',
@@ -35,21 +38,14 @@ language_aliases = {
 }
 
 # search-url
-url = 'https://duckduckgo.com/?q={}'
+url = 'https://links.duckduckgo.com/d.js?'
+
 url_ping = 'https://duckduckgo.com/t/sl_h'
 time_range_dict = {'day': 'd',
                    'week': 'w',
                    'month': 'm',
                    'year': 'y'}
 
-# specific xpath variables
-result_xpath = '//div[@class="links_main links_deep result__body"]'  # noqa
-url_xpath = '//a[@class="result__snippet"]/@href'
-title_xpath = '//a[@class="result__a"]'
-content_xpath = '//a[@class="result__snippet"]'
-correction_xpath = '//a[@id="js-spelling-recourse-link"]'
-
-
 # match query's language to a region code that duckduckgo will accept
 def get_region_code(lang, lang_list=None):
     if lang == 'all':
@@ -61,34 +57,65 @@ def get_region_code(lang, lang_list=None):
     # country code goes first
     return lang_parts[1].lower() + '-' + lang_parts[0].lower()
 
+# def get_vqd(query):
+#     resp = requests.get
 
 def request(query, params):
     if params['time_range'] is not None and params['time_range'] not in time_range_dict:
         return params
 
-    params['url'] = url.format(query)
     params['method'] = 'GET'
-    params['data']['q'] = query
-    params['data']['b'] = ''
-
-    safesearch_ddg_value = None
-    if params['safesearch'] == 0:
-        safesearch_ddg_value = -2  # OFF
-    if params['safesearch'] == 2:
-        safesearch_ddg_value = 1  # STRICT
 
-    if safesearch_ddg_value is not None:
-        params['cookies']['p'] = str(safesearch_ddg_value)
-
-    region_code = get_region_code(params['language'], supported_languages)
-    if region_code:
-        params['data']['kl'] = region_code
-        params['cookies']['kl'] = region_code
-
-    if params['time_range'] in time_range_dict:
-        params['data']['df'] = time_range_dict[params['time_range']]
+    logger.debug(params)
+
+    query_dict = {
+        "q": query,
+        't': 'D',
+        'l': params["language"],
+        'kl': get_region_code(params["language"]),
+        's': 0, # TODO
+        'dl': 'en',
+        'ct': 'US',
+        'ss_mkt': get_region_code(params["language"]),
+        'df': params['time_range'],
+        'vqd' : "3-126340648549743517691069464246778236175-203846832012815914858366468471688211061",
+        'ex': -2,
+        'sp': '1',
+        'bpa': '1',
+        'biaexp': 'b',
+        'msvrtexp': 'b'
+    }
+    if params['safesearch'] == 2: # STRICT
+        del query_dict['t']
+        query_dict['p'] = 1
+        query_dict.update({
+                'videxp': 'a',
+                'nadse': 'b',
+                'eclsexp': 'a',
+                'stiaexp': 'a',
+                'tjsexp': 'b',
+                'related': 'b',
+                'msnexp': 'a'
+            })
+    elif params['safesearch'] == 1: # MODERATE
+        query_dict['ex'] = -1
+        query_dict.update({
+                'nadse': 'b',
+                'eclsexp': 'b',
+                'tjsexp': 'b'
+        })
+    else: # OFF
+        query_dict['ex'] = -2
+        query_dict.update({
+                'nadse': 'b',
+                'eclsexp': 'b',
+                'tjsexp': 'b'
+        })
 
     params['allow_redirects'] = False
+    params["data"] = query_dict
+    params["url"] = url
+    logger.debug(params)
     return params
 
 
@@ -101,22 +128,29 @@ def response(resp):
     results = []
 
     doc = fromstring(resp.text)
-
-    titles = eval_xpath(doc, title_xpath)
-    contents = eval_xpath(doc, content_xpath)
-    urls = eval_xpath(doc, url_xpath)
-
-    for title, content, url in zip(titles, contents, urls):
-        results.append({'title': extract_text(title),
-                        'content': extract_text(content),
-                        'url': url})
+    data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text))
+    search_data = loads(data[0].replace('/\t/g', '    '))
+
+    if len(search_data) == 1 and ('n' not in search_data[0]):
+        only_result = search_data[0]
+        if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or only_result.get('a') is not None or only_result.get('d') == 'google.com search'):
+            return
+    
+
+    for search_result in search_data:
+        if 'n' in search_result:
+            continue
+        results.append({'title': search_result.get("t"),
+                        'content': extract_text(search_result.get('a')),
+                        'url': search_result.get('u')})
 
     # parse correction
-    for correction in eval_xpath(doc, correction_xpath):
-        # append correction
-        results.append({'correction': extract_text(correction)})
+    # for correction in eval_xpath(doc, correction_xpath):
+    #     # append correction
+    #     results.append({'correction': extract_text(correction)})
 
     # return results
+    logger.debug(results)
     return results
 
 
-- 
GitLab


From 889e56235aa21c61b4d709ae9f701f1b64cd412b Mon Sep 17 00:00:00 2001
From: nivesh <nivesh@e.email>
Date: Tue, 15 Feb 2022 01:14:01 +0530
Subject: [PATCH 06/20] remove debug logs

---
 searx/engines/duckduckgo.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 80d18c80d..f52edf22a 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -8,6 +8,7 @@ from json import loads
 from searx.utils import extract_text, match_language, eval_xpath
 from searx import logger
 import re
+import httpx
 
 logger = logger.getChild('ddg engine')
 # about
@@ -26,7 +27,7 @@ paging = False
 supported_languages_url = 'https://duckduckgo.com/util/u172.js'
 time_range_support = True
 safesearch = True
-VQD_REGEX = r"vqd='(\d+-\d+-\d+)'/";
+VQD_REGEX = r"vqd='(\d+-\d+-\d+)'";
 language_aliases = {
     'ar-SA': 'ar-XA',
     'es-419': 'es-XL',
@@ -57,8 +58,10 @@ def get_region_code(lang, lang_list=None):
     # country code goes first
     return lang_parts[1].lower() + '-' + lang_parts[0].lower()
 
-# def get_vqd(query):
-#     resp = requests.get
+def get_vqd(query):
+    resp = httpx.get(f"https://duckduckgo.com/?q={query}&ia=web")
+    resp = re.findall(VQD_REGEX, resp.text)
+    return resp[0]
 
 def request(query, params):
     if params['time_range'] is not None and params['time_range'] not in time_range_dict:
@@ -66,8 +69,7 @@ def request(query, params):
 
     params['method'] = 'GET'
 
-    logger.debug(params)
-
+    vqd = get_vqd(query)
     query_dict = {
         "q": query,
         't': 'D',
@@ -78,7 +80,7 @@ def request(query, params):
         'ct': 'US',
         'ss_mkt': get_region_code(params["language"]),
         'df': params['time_range'],
-        'vqd' : "3-126340648549743517691069464246778236175-203846832012815914858366468471688211061",
+        'vqd' : vqd,
         'ex': -2,
         'sp': '1',
         'bpa': '1',
@@ -115,7 +117,6 @@ def request(query, params):
     params['allow_redirects'] = False
     params["data"] = query_dict
     params["url"] = url
-    logger.debug(params)
     return params
 
 
@@ -149,8 +150,6 @@ def response(resp):
     #     # append correction
     #     results.append({'correction': extract_text(correction)})
 
-    # return results
-    logger.debug(results)
     return results
 
 
-- 
GitLab


From 0867163a4603a1ae3a5c4018712d3d022226760a Mon Sep 17 00:00:00 2001
From: Israel Yago Pereira <israelyago@e.email>
Date: Thu, 23 Dec 2021 16:41:53 -0300
Subject: [PATCH 07/20] Implementing ddg main search engine

---
 searx/engines/duckduckgo.py      | 140 +++++++----------------
 searx/engines/duckduckgo_lite.py | 186 +++++++++++++++++++++++++++++++
 searx/settings.yml               |   4 +
 3 files changed, 230 insertions(+), 100 deletions(-)
 create mode 100644 searx/engines/duckduckgo_lite.py

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index d871a629f..c8653e916 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -1,24 +1,16 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-# lint: pylint
-"""DuckDuckGo Lite
 """
-
-from json import loads
+ DuckDuckGo (Web)
+"""
 
 from lxml.html import fromstring
-
-from searx.utils import (
-    dict_subset,
-    eval_xpath,
-    eval_xpath_getindex,
-    extract_text,
-    match_language,
-)
+from json import loads
+from searx.utils import extract_text, match_language, eval_xpath, dict_subset
 from searx.network import get
 
 # about
 about = {
-    "website": 'https://lite.duckduckgo.com/lite',
+    "website": 'https://duckduckgo.com/',
     "wikidata_id": 'Q12805',
     "official_api_documentation": 'https://duckduckgo.com/api',
     "use_official_api": False,
@@ -28,8 +20,8 @@ about = {
 
 # engine dependent config
 categories = ['general']
-paging = True
-supported_languages_url = 'https://duckduckgo.com/util/u588.js'
+paging = False
+supported_languages_url = 'https://duckduckgo.com/util/u172.js'
 time_range_support = True
 
 language_aliases = {
@@ -42,16 +34,20 @@ language_aliases = {
     'zh-HK': 'tzh-HK'
 }
 
-time_range_dict = {
-    'day': 'd',
-    'week': 'w',
-    'month': 'm',
-    'year': 'y'
-}
-
 # search-url
-url = 'https://lite.duckduckgo.com/lite'
-url_ping = 'https://duckduckgo.com/t/sl_l'
+url = 'https://html.duckduckgo.com/html/?q={}'
+url_ping = 'https://duckduckgo.com/t/sl_h'
+time_range_dict = {'day': 'd',
+                   'week': 'w',
+                   'month': 'm',
+                   'year': 'y'}
+
+# specific xpath variables
+result_xpath = '//div[@class="links_main links_deep result__body"]'  # noqa
+url_xpath = '//a[@class="result__snippet"]/@href'
+title_xpath = '//a[@class="result__a"]'
+content_xpath = '//a[@class="result__snippet"]'
+correction_xpath = '//a[@id="js-spelling-recourse-link"]'
 
 
 # match query's language to a region code that duckduckgo will accept
@@ -67,108 +63,52 @@ def get_region_code(lang, lang_list=None):
 
 
 def request(query, params):
+    if params['time_range'] is not None and params['time_range'] not in time_range_dict:
+        return params
 
-    params['url'] = url
+    params['url'] = url.format(query)
     params['method'] = 'POST'
-
     params['data']['q'] = query
-
-    # The API is not documented, so we do some reverse engineering and emulate
-    # what https://lite.duckduckgo.com/lite/ does when you press "next Page"
-    # link again and again ..
-
-    params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
-
-    # initial page does not have an offset
-    if params['pageno'] == 2:
-        # second page does have an offset of 30
-        offset = (params['pageno'] - 1) * 30
-        params['data']['s'] = offset
-        params['data']['dc'] = offset + 1
-
-    elif params['pageno'] > 2:
-        # third and following pages do have an offset of 30 + n*50
-        offset = 30 + (params['pageno'] - 2) * 50
-        params['data']['s'] = offset
-        params['data']['dc'] = offset + 1
-
-    # initial page does not have additional data in the input form
-    if params['pageno'] > 1:
-        # request the second page (and more pages) needs 'o' and 'api' arguments
-        params['data']['o'] = 'json'
-        params['data']['api'] = 'd.js'
-
-    # initial page does not have additional data in the input form
-    if params['pageno'] > 2:
-        # request the third page (and more pages) some more arguments
-        params['data']['nextParams'] = ''
-        params['data']['v'] = ''
-        params['data']['vqd'] = ''
+    params['data']['b'] = ''
 
     region_code = get_region_code(params['language'], supported_languages)
     if region_code:
         params['data']['kl'] = region_code
         params['cookies']['kl'] = region_code
 
-    params['data']['df'] = ''
     if params['time_range'] in time_range_dict:
         params['data']['df'] = time_range_dict[params['time_range']]
-        params['cookies']['df'] = time_range_dict[params['time_range']]
 
+    params['allow_redirects'] = False
     return params
 
 
 # get response from search-request
 def response(resp):
-
-    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
-    get(url_ping, headers=headers_ping)
-
     if resp.status_code == 303:
         return []
 
+    # parse the response
     results = []
     doc = fromstring(resp.text)
+    
+    titles = eval_xpath(doc, title_xpath)
+    contents = eval_xpath(doc, content_xpath)
+    urls  = eval_xpath(doc, url_xpath)
 
-    result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
-    if not len(result_table) >= 3:
-        # no more results
-        return []
-    result_table = result_table[2]
-
-    tr_rows = eval_xpath(result_table, './/tr')
-
-    # In the last <tr> is the form of the 'previous/next page' links
-    tr_rows = tr_rows[:-1]
-
-    len_tr_rows = len(tr_rows)
-    offset = 0
-
-    while len_tr_rows >= offset + 4:
-
-        # assemble table rows we need to scrap
-        tr_title = tr_rows[offset]
-        tr_content = tr_rows[offset + 1]
-        offset += 4
-
-        # ignore sponsored Adds <tr class="result-sponsored">
-        if tr_content.get('class') == 'result-sponsored':
-            continue
-
-        a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
-        if a_tag is None:
-            continue
+    for title, content, url in zip(titles, contents, urls):
+        print(extract_text(content))
 
-        td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
-        if td_content is None:
-            continue
+        results.append({'title': extract_text(title),
+                        'content': extract_text(content),
+                        'url': url})
 
-        results.append({
-            'title': a_tag.text_content(),
-            'content': extract_text(td_content),
-            'url': a_tag.get('href'),
-        })
+    # parse correction
+    for correction in eval_xpath(doc, correction_xpath):
+        # append correction
+        results.append({'correction': extract_text(correction)})
 
+    # return results
     return results
 
 
diff --git a/searx/engines/duckduckgo_lite.py b/searx/engines/duckduckgo_lite.py
new file mode 100644
index 000000000..d871a629f
--- /dev/null
+++ b/searx/engines/duckduckgo_lite.py
@@ -0,0 +1,186 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""DuckDuckGo Lite
+"""
+
+from json import loads
+
+from lxml.html import fromstring
+
+from searx.utils import (
+    dict_subset,
+    eval_xpath,
+    eval_xpath_getindex,
+    extract_text,
+    match_language,
+)
+from searx.network import get
+
+# about
+about = {
+    "website": 'https://lite.duckduckgo.com/lite',
+    "wikidata_id": 'Q12805',
+    "official_api_documentation": 'https://duckduckgo.com/api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
+# engine dependent config
+categories = ['general']
+paging = True
+supported_languages_url = 'https://duckduckgo.com/util/u588.js'
+time_range_support = True
+
+language_aliases = {
+    'ar-SA': 'ar-XA',
+    'es-419': 'es-XL',
+    'ja': 'jp-JP',
+    'ko': 'kr-KR',
+    'sl-SI': 'sl-SL',
+    'zh-TW': 'tzh-TW',
+    'zh-HK': 'tzh-HK'
+}
+
+time_range_dict = {
+    'day': 'd',
+    'week': 'w',
+    'month': 'm',
+    'year': 'y'
+}
+
+# search-url
+url = 'https://lite.duckduckgo.com/lite'
+url_ping = 'https://duckduckgo.com/t/sl_l'
+
+
+# match query's language to a region code that duckduckgo will accept
+def get_region_code(lang, lang_list=None):
+    if lang == 'all':
+        return None
+
+    lang_code = match_language(lang, lang_list or [], language_aliases, 'wt-WT')
+    lang_parts = lang_code.split('-')
+
+    # country code goes first
+    return lang_parts[1].lower() + '-' + lang_parts[0].lower()
+
+
+def request(query, params):
+
+    params['url'] = url
+    params['method'] = 'POST'
+
+    params['data']['q'] = query
+
+    # The API is not documented, so we do some reverse engineering and emulate
+    # what https://lite.duckduckgo.com/lite/ does when you press "next Page"
+    # link again and again ..
+
+    params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
+
+    # initial page does not have an offset
+    if params['pageno'] == 2:
+        # second page does have an offset of 30
+        offset = (params['pageno'] - 1) * 30
+        params['data']['s'] = offset
+        params['data']['dc'] = offset + 1
+
+    elif params['pageno'] > 2:
+        # third and following pages do have an offset of 30 + n*50
+        offset = 30 + (params['pageno'] - 2) * 50
+        params['data']['s'] = offset
+        params['data']['dc'] = offset + 1
+
+    # initial page does not have additional data in the input form
+    if params['pageno'] > 1:
+        # request the second page (and more pages) needs 'o' and 'api' arguments
+        params['data']['o'] = 'json'
+        params['data']['api'] = 'd.js'
+
+    # initial page does not have additional data in the input form
+    if params['pageno'] > 2:
+        # request the third page (and more pages) some more arguments
+        params['data']['nextParams'] = ''
+        params['data']['v'] = ''
+        params['data']['vqd'] = ''
+
+    region_code = get_region_code(params['language'], supported_languages)
+    if region_code:
+        params['data']['kl'] = region_code
+        params['cookies']['kl'] = region_code
+
+    params['data']['df'] = ''
+    if params['time_range'] in time_range_dict:
+        params['data']['df'] = time_range_dict[params['time_range']]
+        params['cookies']['df'] = time_range_dict[params['time_range']]
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+
+    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
+    get(url_ping, headers=headers_ping)
+
+    if resp.status_code == 303:
+        return []
+
+    results = []
+    doc = fromstring(resp.text)
+
+    result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
+    if not len(result_table) >= 3:
+        # no more results
+        return []
+    result_table = result_table[2]
+
+    tr_rows = eval_xpath(result_table, './/tr')
+
+    # In the last <tr> is the form of the 'previous/next page' links
+    tr_rows = tr_rows[:-1]
+
+    len_tr_rows = len(tr_rows)
+    offset = 0
+
+    while len_tr_rows >= offset + 4:
+
+        # assemble table rows we need to scrap
+        tr_title = tr_rows[offset]
+        tr_content = tr_rows[offset + 1]
+        offset += 4
+
+        # ignore sponsored Adds <tr class="result-sponsored">
+        if tr_content.get('class') == 'result-sponsored':
+            continue
+
+        a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
+        if a_tag is None:
+            continue
+
+        td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
+        if td_content is None:
+            continue
+
+        results.append({
+            'title': a_tag.text_content(),
+            'content': extract_text(td_content),
+            'url': a_tag.get('href'),
+        })
+
+    return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+
+    # response is a js file with regions as an embedded object
+    response_page = resp.text
+    response_page = response_page[response_page.find('regions:{') + 8:]
+    response_page = response_page[:response_page.find('}') + 1]
+
+    regions_json = loads(response_page)
+    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
+
+    return list(supported_languages)
diff --git a/searx/settings.yml b/searx/settings.yml
index a869bba4f..e8fe6aa06 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -415,6 +415,10 @@ engines:
     engine : duckduckgo
     shortcut : ddg
 
+  - name : duckduckgo (lite)
+    engine : duckduckgo_lite
+    shortcut : ddgl
+
   - name : duckduckgo images
     engine : duckduckgo_images
     shortcut : ddi
-- 
GitLab


From 74624625e6e4d1c63c62c80caf527176718c85cd Mon Sep 17 00:00:00 2001
From: Israel Yago Pereira <israelyago@e.email>
Date: Mon, 3 Jan 2022 15:12:48 -0300
Subject: [PATCH 08/20] Update ddg url and add safesearch

---
 searx/engines/duckduckgo.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index c8653e916..f93c7815f 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -23,6 +23,7 @@ categories = ['general']
 paging = False
 supported_languages_url = 'https://duckduckgo.com/util/u172.js'
 time_range_support = True
+safesearch = True
 
 language_aliases = {
     'ar-SA': 'ar-XA',
@@ -35,7 +36,7 @@ language_aliases = {
 }
 
 # search-url
-url = 'https://html.duckduckgo.com/html/?q={}'
+url = 'https://duckduckgo.com/?q={}'
 url_ping = 'https://duckduckgo.com/t/sl_h'
 time_range_dict = {'day': 'd',
                    'week': 'w',
@@ -67,10 +68,19 @@ def request(query, params):
         return params
 
     params['url'] = url.format(query)
-    params['method'] = 'POST'
+    params['method'] = 'GET'
     params['data']['q'] = query
     params['data']['b'] = ''
 
+    safesearch_ddg_value = None
+    if params['safesearch'] == 0:
+        safesearch_ddg_value = -2 # OFF
+    if params['safesearch'] == 2:
+        safesearch_ddg_value = 1 # STRICT
+
+    if safesearch_ddg_value != None:
+        params['cookies']['p'] = str(safesearch_ddg_value)
+
     region_code = get_region_code(params['language'], supported_languages)
     if region_code:
         params['data']['kl'] = region_code
@@ -90,15 +100,14 @@ def response(resp):
 
     # parse the response
     results = []
+
     doc = fromstring(resp.text)
-    
+
     titles = eval_xpath(doc, title_xpath)
     contents = eval_xpath(doc, content_xpath)
-    urls  = eval_xpath(doc, url_xpath)
+    urls = eval_xpath(doc, url_xpath)
 
     for title, content, url in zip(titles, contents, urls):
-        print(extract_text(content))
-
         results.append({'title': extract_text(title),
                         'content': extract_text(content),
                         'url': url})
-- 
GitLab


From 0579cfb31f2313d6b20e193a1a2e97a044aa3902 Mon Sep 17 00:00:00 2001
From: Israel Yago Pereira <israelyago@e.email>
Date: Mon, 3 Jan 2022 15:22:05 -0300
Subject: [PATCH 09/20] Fix small pep8 issues

---
 searx/engines/duckduckgo.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index f93c7815f..a1055767f 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -74,11 +74,11 @@ def request(query, params):
 
     safesearch_ddg_value = None
     if params['safesearch'] == 0:
-        safesearch_ddg_value = -2 # OFF
+        safesearch_ddg_value = -2  # OFF
     if params['safesearch'] == 2:
-        safesearch_ddg_value = 1 # STRICT
+        safesearch_ddg_value = 1  # STRICT
 
-    if safesearch_ddg_value != None:
+    if safesearch_ddg_value is not None:
         params['cookies']['p'] = str(safesearch_ddg_value)
 
     region_code = get_region_code(params['language'], supported_languages)
-- 
GitLab


From c70198ae2971b6dbea0568726d7194fa971ca362 Mon Sep 17 00:00:00 2001
From: Israel Yago Pereira <israelyago@e.email>
Date: Mon, 3 Jan 2022 15:45:33 -0300
Subject: [PATCH 10/20] Remove unused imports

---
 searx/engines/duckduckgo.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index a1055767f..e2b8ea4c8 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -5,8 +5,7 @@
 
 from lxml.html import fromstring
 from json import loads
-from searx.utils import extract_text, match_language, eval_xpath, dict_subset
-from searx.network import get
+from searx.utils import extract_text, match_language, eval_xpath
 
 # about
 about = {
-- 
GitLab


From b562bce5fbe1619fa5da45d0304820ddf779df6d Mon Sep 17 00:00:00 2001
From: nivesh <nivesh@e.email>
Date: Fri, 11 Feb 2022 01:35:33 +0530
Subject: [PATCH 11/20] inital version of safe search for ddg engine

---
 searx/engines/duckduckgo.py | 116 +++++++++++++++++++++++-------------
 1 file changed, 75 insertions(+), 41 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index e2b8ea4c8..80d18c80d 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -6,7 +6,10 @@
 from lxml.html import fromstring
 from json import loads
 from searx.utils import extract_text, match_language, eval_xpath
+from searx import logger
+import re
 
+logger = logger.getChild('ddg engine')
 # about
 about = {
     "website": 'https://duckduckgo.com/',
@@ -23,7 +26,7 @@ paging = False
 supported_languages_url = 'https://duckduckgo.com/util/u172.js'
 time_range_support = True
 safesearch = True
-
+VQD_REGEX = r"vqd='(\d+-\d+-\d+)'/";
 language_aliases = {
     'ar-SA': 'ar-XA',
     'es-419': 'es-XL',
@@ -35,21 +38,14 @@ language_aliases = {
 }
 
 # search-url
-url = 'https://duckduckgo.com/?q={}'
+url = 'https://links.duckduckgo.com/d.js?'
+
 url_ping = 'https://duckduckgo.com/t/sl_h'
 time_range_dict = {'day': 'd',
                    'week': 'w',
                    'month': 'm',
                    'year': 'y'}
 
-# specific xpath variables
-result_xpath = '//div[@class="links_main links_deep result__body"]'  # noqa
-url_xpath = '//a[@class="result__snippet"]/@href'
-title_xpath = '//a[@class="result__a"]'
-content_xpath = '//a[@class="result__snippet"]'
-correction_xpath = '//a[@id="js-spelling-recourse-link"]'
-
-
 # match query's language to a region code that duckduckgo will accept
 def get_region_code(lang, lang_list=None):
     if lang == 'all':
@@ -61,34 +57,65 @@ def get_region_code(lang, lang_list=None):
     # country code goes first
     return lang_parts[1].lower() + '-' + lang_parts[0].lower()
 
+# def get_vqd(query):
+#     resp = requests.get
 
 def request(query, params):
     if params['time_range'] is not None and params['time_range'] not in time_range_dict:
         return params
 
-    params['url'] = url.format(query)
     params['method'] = 'GET'
-    params['data']['q'] = query
-    params['data']['b'] = ''
-
-    safesearch_ddg_value = None
-    if params['safesearch'] == 0:
-        safesearch_ddg_value = -2  # OFF
-    if params['safesearch'] == 2:
-        safesearch_ddg_value = 1  # STRICT
 
-    if safesearch_ddg_value is not None:
-        params['cookies']['p'] = str(safesearch_ddg_value)
-
-    region_code = get_region_code(params['language'], supported_languages)
-    if region_code:
-        params['data']['kl'] = region_code
-        params['cookies']['kl'] = region_code
-
-    if params['time_range'] in time_range_dict:
-        params['data']['df'] = time_range_dict[params['time_range']]
+    logger.debug(params)
+
+    query_dict = {
+        "q": query,
+        't': 'D',
+        'l': params["language"],
+        'kl': get_region_code(params["language"]),
+        's': 0, # TODO
+        'dl': 'en',
+        'ct': 'US',
+        'ss_mkt': get_region_code(params["language"]),
+        'df': params['time_range'],
+        'vqd' : "3-126340648549743517691069464246778236175-203846832012815914858366468471688211061",
+        'ex': -2,
+        'sp': '1',
+        'bpa': '1',
+        'biaexp': 'b',
+        'msvrtexp': 'b'
+    }
+    if params['safesearch'] == 2: # STRICT
+        del query_dict['t']
+        query_dict['p'] = 1
+        query_dict.update({
+                'videxp': 'a',
+                'nadse': 'b',
+                'eclsexp': 'a',
+                'stiaexp': 'a',
+                'tjsexp': 'b',
+                'related': 'b',
+                'msnexp': 'a'
+            })
+    elif params['safesearch'] == 1: # MODERATE
+        query_dict['ex'] = -1
+        query_dict.update({
+                'nadse': 'b',
+                'eclsexp': 'b',
+                'tjsexp': 'b'
+        })
+    else: # OFF
+        query_dict['ex'] = -2
+        query_dict.update({
+                'nadse': 'b',
+                'eclsexp': 'b',
+                'tjsexp': 'b'
+        })
 
     params['allow_redirects'] = False
+    params["data"] = query_dict
+    params["url"] = url
+    logger.debug(params)
     return params
 
 
@@ -101,22 +128,29 @@ def response(resp):
     results = []
 
     doc = fromstring(resp.text)
-
-    titles = eval_xpath(doc, title_xpath)
-    contents = eval_xpath(doc, content_xpath)
-    urls = eval_xpath(doc, url_xpath)
-
-    for title, content, url in zip(titles, contents, urls):
-        results.append({'title': extract_text(title),
-                        'content': extract_text(content),
-                        'url': url})
+    data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text))
+    search_data = loads(data[0].replace('/\t/g', '    '))
+
+    if len(search_data) == 1 and ('n' not in search_data[0]):
+        only_result = search_data[0]
+        if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or only_result.get('a') is not None or only_result.get('d') == 'google.com search'):
+            return
+    
+
+    for search_result in search_data:
+        if 'n' in search_result:
+            continue
+        results.append({'title': search_result.get("t"),
+                        'content': extract_text(search_result.get('a')),
+                        'url': search_result.get('u')})
 
     # parse correction
-    for correction in eval_xpath(doc, correction_xpath):
-        # append correction
-        results.append({'correction': extract_text(correction)})
+    # for correction in eval_xpath(doc, correction_xpath):
+    #     # append correction
+    #     results.append({'correction': extract_text(correction)})
 
     # return results
+    logger.debug(results)
     return results
 
 
-- 
GitLab


From c7d28ead4bde5293720a1d8f7749347a9df95b6e Mon Sep 17 00:00:00 2001
From: nivesh <nivesh@e.email>
Date: Tue, 15 Feb 2022 01:14:01 +0530
Subject: [PATCH 12/20] remove debug logs

---
 searx/engines/duckduckgo.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 80d18c80d..f52edf22a 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -8,6 +8,7 @@ from json import loads
 from searx.utils import extract_text, match_language, eval_xpath
 from searx import logger
 import re
+import httpx
 
 logger = logger.getChild('ddg engine')
 # about
@@ -26,7 +27,7 @@ paging = False
 supported_languages_url = 'https://duckduckgo.com/util/u172.js'
 time_range_support = True
 safesearch = True
-VQD_REGEX = r"vqd='(\d+-\d+-\d+)'/";
+VQD_REGEX = r"vqd='(\d+-\d+-\d+)'";
 language_aliases = {
     'ar-SA': 'ar-XA',
     'es-419': 'es-XL',
@@ -57,8 +58,10 @@ def get_region_code(lang, lang_list=None):
     # country code goes first
     return lang_parts[1].lower() + '-' + lang_parts[0].lower()
 
-# def get_vqd(query):
-#     resp = requests.get
+def get_vqd(query):
+    resp = httpx.get(f"https://duckduckgo.com/?q={query}&ia=web")
+    resp = re.findall(VQD_REGEX, resp.text)
+    return resp[0]
 
 def request(query, params):
     if params['time_range'] is not None and params['time_range'] not in time_range_dict:
@@ -66,8 +69,7 @@ def request(query, params):
 
     params['method'] = 'GET'
 
-    logger.debug(params)
-
+    vqd = get_vqd(query)
     query_dict = {
         "q": query,
         't': 'D',
@@ -78,7 +80,7 @@ def request(query, params):
         'ct': 'US',
         'ss_mkt': get_region_code(params["language"]),
         'df': params['time_range'],
-        'vqd' : "3-126340648549743517691069464246778236175-203846832012815914858366468471688211061",
+        'vqd' : vqd,
         'ex': -2,
         'sp': '1',
         'bpa': '1',
@@ -115,7 +117,6 @@ def request(query, params):
     params['allow_redirects'] = False
     params["data"] = query_dict
     params["url"] = url
-    logger.debug(params)
     return params
 
 
@@ -149,8 +150,6 @@ def response(resp):
     #     # append correction
     #     results.append({'correction': extract_text(correction)})
 
-    # return results
-    logger.debug(results)
     return results
 
 
-- 
GitLab


From 085f08bd2b6ba634cec05386750b7ff971783dfb Mon Sep 17 00:00:00 2001
From: Nivesh <nivesh@e.email>
Date: Mon, 28 Feb 2022 15:40:30 +0530
Subject: [PATCH 13/20] fix pep8

---
 searx/engines/duckduckgo.py | 57 +++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 31 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index f52edf22a..4a109d2c8 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -3,9 +3,8 @@
  DuckDuckGo (Web)
 """
 
-from lxml.html import fromstring
 from json import loads
-from searx.utils import extract_text, match_language, eval_xpath
+from searx.utils import extract_text, match_language
 from searx import logger
 import re
 import httpx
@@ -27,7 +26,7 @@ paging = False
 supported_languages_url = 'https://duckduckgo.com/util/u172.js'
 time_range_support = True
 safesearch = True
-VQD_REGEX = r"vqd='(\d+-\d+-\d+)'";
+VQD_REGEX = r"vqd='(\d+-\d+-\d+)'"
 language_aliases = {
     'ar-SA': 'ar-XA',
     'es-419': 'es-XL',
@@ -47,6 +46,7 @@ time_range_dict = {'day': 'd',
                    'month': 'm',
                    'year': 'y'}
 
+
 # match query's language to a region code that duckduckgo will accept
 def get_region_code(lang, lang_list=None):
     if lang == 'all':
@@ -58,11 +58,13 @@ def get_region_code(lang, lang_list=None):
     # country code goes first
     return lang_parts[1].lower() + '-' + lang_parts[0].lower()
 
+
 def get_vqd(query):
     resp = httpx.get(f"https://duckduckgo.com/?q={query}&ia=web")
     resp = re.findall(VQD_REGEX, resp.text)
     return resp[0]
 
+
 def request(query, params):
     if params['time_range'] is not None and params['time_range'] not in time_range_dict:
         return params
@@ -75,43 +77,43 @@ def request(query, params):
         't': 'D',
         'l': params["language"],
         'kl': get_region_code(params["language"]),
-        's': 0, # TODO
+        's': 0,
         'dl': 'en',
         'ct': 'US',
         'ss_mkt': get_region_code(params["language"]),
         'df': params['time_range'],
-        'vqd' : vqd,
+        'vqd': vqd,
         'ex': -2,
         'sp': '1',
         'bpa': '1',
         'biaexp': 'b',
         'msvrtexp': 'b'
     }
-    if params['safesearch'] == 2: # STRICT
+    if params['safesearch'] == 2:  # STRICT
         del query_dict['t']
         query_dict['p'] = 1
         query_dict.update({
-                'videxp': 'a',
-                'nadse': 'b',
-                'eclsexp': 'a',
-                'stiaexp': 'a',
-                'tjsexp': 'b',
-                'related': 'b',
-                'msnexp': 'a'
-            })
-    elif params['safesearch'] == 1: # MODERATE
+            'videxp': 'a',
+            'nadse': 'b',
+            'eclsexp': 'a',
+            'stiaexp': 'a',
+            'tjsexp': 'b',
+            'related': 'b',
+            'msnexp': 'a'
+        })
+    elif params['safesearch'] == 1:  # MODERATE
         query_dict['ex'] = -1
         query_dict.update({
-                'nadse': 'b',
-                'eclsexp': 'b',
-                'tjsexp': 'b'
+            'nadse': 'b',
+            'eclsexp': 'b',
+            'tjsexp': 'b'
         })
-    else: # OFF
+    else:  # OFF
         query_dict['ex'] = -2
         query_dict.update({
-                'nadse': 'b',
-                'eclsexp': 'b',
-                'tjsexp': 'b'
+            'nadse': 'b',
+            'eclsexp': 'b',
+            'tjsexp': 'b'
         })
 
     params['allow_redirects'] = False
@@ -128,15 +130,14 @@ def response(resp):
     # parse the response
     results = []
 
-    doc = fromstring(resp.text)
     data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text))
     search_data = loads(data[0].replace('/\t/g', '    '))
 
     if len(search_data) == 1 and ('n' not in search_data[0]):
         only_result = search_data[0]
-        if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or only_result.get('a') is not None or only_result.get('d') == 'google.com search'):
+        if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or
+                only_result.get('a') is not None or only_result.get('d') == 'google.com search'):
             return
-    
 
     for search_result in search_data:
         if 'n' in search_result:
@@ -144,12 +145,6 @@ def response(resp):
         results.append({'title': search_result.get("t"),
                         'content': extract_text(search_result.get('a')),
                         'url': search_result.get('u')})
-
-    # parse correction
-    # for correction in eval_xpath(doc, correction_xpath):
-    #     # append correction
-    #     results.append({'correction': extract_text(correction)})
-
     return results
 
 
-- 
GitLab


From 594403440528040d89fccba5cbf0e851472ac7cf Mon Sep 17 00:00:00 2001
From: nivesh <nivesh@e.email>
Date: Thu, 17 Mar 2022 18:40:26 +0530
Subject: [PATCH 14/20] fix paging and offset in ddg engine

---
 searx/engines/duckduckgo.py |  5 +++--
 searx/engines/jstest.js     | 24 ++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 searx/engines/jstest.js

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 70f1a8f65..876959dcd 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -22,8 +22,9 @@ about = {
 
 # engine dependent config
 categories = ['general']
-paging = False
+paging = True
 supported_languages_url = 'https://duckduckgo.com/util/u172.js'
+number_of_results = 10
 time_range_support = True
 safesearch = True
 VQD_REGEX = r"vqd='(\d+-\d+-\d+)'"
@@ -86,7 +87,7 @@ def request(query, params):
         't': 'D',
         'l': params["language"],
         'kl': get_region_code(params["language"]),
-        's': 0,
+        's': (params['pageno'] - 1) * number_of_results,
         'dl': 'en',
         'ct': 'US',
         'ss_mkt': get_region_code(params["language"]),
diff --git a/searx/engines/jstest.js b/searx/engines/jstest.js
new file mode 100644
index 000000000..46fccaf2b
--- /dev/null
+++ b/searx/engines/jstest.js
@@ -0,0 +1,24 @@
+const DDG = require('duck-duck-scrape');
+// let x = DDG.search('fuck', {
+//   safeSearch: DDG.SafeSearchType.STRICT
+// }).then((data) => {
+//     data["results"].forEach(element => {
+//       console.log(element.title, 'STRICT')
+//     });
+// });
+
+// let y = DDG.search('fuck', {
+//   safeSearch: DDG.SafeSearchType.MODERATE
+// }).then((data) => {
+//   data["results"].forEach(element => {
+//     console.log(element.title, 'MODERATE')
+//   });
+// });
+
+let z = DDG.search('fuck', {
+  safeSearch: DDG.SafeSearchType.OFF
+}).then((data) => {
+  data["results"].forEach(element => {
+    console.log(element.title, 'OFF')
+  });
+});
-- 
GitLab


From f2fc86f49ba23799ade566964cd050e9a7bc80be Mon Sep 17 00:00:00 2001
From: nivesh <nivesh@e.email>
Date: Thu, 17 Mar 2022 18:40:51 +0530
Subject: [PATCH 15/20] remove test file

---
 searx/engines/jstest.js | 24 ------------------------
 1 file changed, 24 deletions(-)
 delete mode 100644 searx/engines/jstest.js

diff --git a/searx/engines/jstest.js b/searx/engines/jstest.js
deleted file mode 100644
index 46fccaf2b..000000000
--- a/searx/engines/jstest.js
+++ /dev/null
@@ -1,24 +0,0 @@
-const DDG = require('duck-duck-scrape');
-// let x = DDG.search('fuck', {
-//   safeSearch: DDG.SafeSearchType.STRICT
-// }).then((data) => {
-//     data["results"].forEach(element => {
-//       console.log(element.title, 'STRICT')
-//     });
-// });
-
-// let y = DDG.search('fuck', {
-//   safeSearch: DDG.SafeSearchType.MODERATE
-// }).then((data) => {
-//   data["results"].forEach(element => {
-//     console.log(element.title, 'MODERATE')
-//   });
-// });
-
-let z = DDG.search('fuck', {
-  safeSearch: DDG.SafeSearchType.OFF
-}).then((data) => {
-  data["results"].forEach(element => {
-    console.log(element.title, 'OFF')
-  });
-});
-- 
GitLab


From ff1b14438485f7ce4a25fc7ac27edf2bc763a38e Mon Sep 17 00:00:00 2001
From: nivesh <nivesh@e.email>
Date: Thu, 17 Mar 2022 18:57:16 +0530
Subject: [PATCH 16/20] fix lint and pep issues

---
 searx/engines/duckduckgo.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 876959dcd..6b8f763c3 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -53,6 +53,7 @@ time_range_dict = {'day': 'd',
                    'month': 'm',
                    'year': 'y'}
 
+
 # match query's language to a region code that duckduckgo will accept
 def get_region_code(lang, lang_list=None):
     if lang == 'all':
@@ -64,10 +65,6 @@ def get_region_code(lang, lang_list=None):
     # country code goes first
     return lang_parts[1].lower() + '-' + lang_parts[0].lower()
 
-def get_vqd(query):
-    resp = httpx.get(f"https://duckduckgo.com/?q={query}&ia=web")
-    resp = re.findall(VQD_REGEX, resp.text)
-    return resp[0]
 
 def get_vqd(query):
     resp = httpx.get(f"https://duckduckgo.com/?q={query}&ia=web")
-- 
GitLab


From ce6700dfb53081d7d2156327d70220f447df489d Mon Sep 17 00:00:00 2001
From: Nivesh Krishna <nivesh@e.email>
Date: Wed, 23 Mar 2022 16:42:43 +0530
Subject: [PATCH 17/20] fix language selection in ddg engine

---
 searx/engines/duckduckgo.py | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 6b8f763c3..12cbae6ab 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -4,10 +4,11 @@
 """
 
 from json import loads
-from searx.utils import extract_text, match_language
+from urllib.parse import urlencode
+from searx.utils import match_language, HTMLTextExtractor
 from searx import logger
 import re
-import httpx
+from searx.network import get
 
 logger = logger.getChild('ddg engine')
 # about
@@ -66,8 +67,8 @@ def get_region_code(lang, lang_list=None):
     return lang_parts[1].lower() + '-' + lang_parts[0].lower()
 
 
-def get_vqd(query):
-    resp = httpx.get(f"https://duckduckgo.com/?q={query}&ia=web")
+def get_vqd(query, headers):
+    resp = get(f"https://duckduckgo.com/?q={query}&ia=web", headers=headers)
     resp = re.findall(VQD_REGEX, resp.text)
     return resp[0]
 
@@ -78,16 +79,17 @@ def request(query, params):
 
     params['method'] = 'GET'
 
-    vqd = get_vqd(query)
+    vqd = get_vqd(query, params["headers"])
+    dl,ct = match_language(params["language"], supported_languages, language_aliases, 'wt-WT').split("-")
     query_dict = {
         "q": query,
         't': 'D',
         'l': params["language"],
-        'kl': get_region_code(params["language"]),
+        'kl': f"{ct}-{dl}",
         's': (params['pageno'] - 1) * number_of_results,
-        'dl': 'en',
-        'ct': 'US',
-        'ss_mkt': get_region_code(params["language"]),
+        'dl': dl,
+        'ct': ct,
+        'ss_mkt': get_region_code(params["language"], supported_languages),
         'df': params['time_range'],
         'vqd': vqd,
         'ex': -2,
@@ -125,7 +127,8 @@ def request(query, params):
 
     params['allow_redirects'] = False
     params["data"] = query_dict
-    params["url"] = url
+    params['cookies']['kl'] =  params["data"]["kl"]
+    params["url"] = url + urlencode(params["data"])
     return params
 
 
@@ -138,19 +141,24 @@ def response(resp):
     results = []
 
     data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text))
-    search_data = loads(data[0].replace('/\t/g', '    '))
+    try:
+        search_data = loads(data[0].replace('/\t/g', '    '))
+    except IndexError:
+        return
 
     if len(search_data) == 1 and ('n' not in search_data[0]):
         only_result = search_data[0]
         if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or
                 only_result.get('a') is not None or only_result.get('d') == 'google.com search'):
             return
-
+    
     for search_result in search_data:
         if 'n' in search_result:
             continue
+        html2text = HTMLTextExtractor()
+        html2text.feed(search_result.get('a'))
         results.append({'title': search_result.get("t"),
-                        'content': extract_text(search_result.get('a')),
+                        'content': html2text.get_text(),
                         'url': search_result.get('u')})
     return results
 
-- 
GitLab


From 37e5148b535b97f686eb7b04d4559cd6b52c3f4c Mon Sep 17 00:00:00 2001
From: Nivesh Krishna <nivesh@e.email>
Date: Wed, 23 Mar 2022 16:48:27 +0530
Subject: [PATCH 18/20] fix time range in ddg engine

---
 searx/engines/duckduckgo.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 12cbae6ab..f6ec4385a 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -41,13 +41,6 @@ language_aliases = {
 
 # search-url
 url = 'https://links.duckduckgo.com/d.js?'
-
-url_ping = 'https://duckduckgo.com/t/sl_h'
-time_range_dict = {'day': 'd',
-                   'week': 'w',
-                   'month': 'm',
-                   'year': 'y'}
-
 url_ping = 'https://duckduckgo.com/t/sl_h'
 time_range_dict = {'day': 'd',
                    'week': 'w',
@@ -74,8 +67,6 @@ def get_vqd(query, headers):
 
 
 def request(query, params):
-    if params['time_range'] is not None and params['time_range'] not in time_range_dict:
-        return params
 
     params['method'] = 'GET'
 
@@ -128,6 +119,9 @@ def request(query, params):
     params['allow_redirects'] = False
     params["data"] = query_dict
     params['cookies']['kl'] =  params["data"]["kl"]
+    if params['time_range'] in time_range_dict:
+        params['data']['df'] = time_range_dict[params['time_range']]
+        params['cookies']['df'] = time_range_dict[params['time_range']]
     params["url"] = url + urlencode(params["data"])
     return params
 
-- 
GitLab


From 30665b6aed7d642aa292fd84c2441d4d4113dda2 Mon Sep 17 00:00:00 2001
From: Nivesh Krishna <nivesh@e.email>
Date: Wed, 23 Mar 2022 18:28:04 +0530
Subject: [PATCH 19/20] fix pep8 issues

---
 searx/engines/duckduckgo.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index f6ec4385a..8878522f0 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -71,7 +71,7 @@ def request(query, params):
     params['method'] = 'GET'
 
     vqd = get_vqd(query, params["headers"])
-    dl,ct = match_language(params["language"], supported_languages, language_aliases, 'wt-WT').split("-")
+    dl, ct = match_language(params["language"], supported_languages, language_aliases, 'wt-WT').split("-")
     query_dict = {
         "q": query,
         't': 'D',
@@ -118,7 +118,7 @@ def request(query, params):
 
     params['allow_redirects'] = False
     params["data"] = query_dict
-    params['cookies']['kl'] =  params["data"]["kl"]
+    params['cookies']['kl'] = params["data"]["kl"]
     if params['time_range'] in time_range_dict:
         params['data']['df'] = time_range_dict[params['time_range']]
         params['cookies']['df'] = time_range_dict[params['time_range']]
@@ -145,7 +145,7 @@ def response(resp):
         if ((only_result.get("da") is not None and only_result.get("t") == 'EOF') or
                 only_result.get('a') is not None or only_result.get('d') == 'google.com search'):
             return
-    
+
     for search_result in search_data:
         if 'n' in search_result:
             continue
-- 
GitLab


From 4faebacd7b92d5cb6b8acca636894a404da90d4e Mon Sep 17 00:00:00 2001
From: Nivesh Krishna <nivesh@e.email>
Date: Wed, 23 Mar 2022 19:50:51 +0530
Subject: [PATCH 20/20] remove ddg lite from default engines

---
 searx/settings.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/searx/settings.yml b/searx/settings.yml
index e8fe6aa06..255b41c7a 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -418,6 +418,7 @@ engines:
   - name : duckduckgo (lite)
     engine : duckduckgo_lite
     shortcut : ddgl
+    disabled : True
 
   - name : duckduckgo images
     engine : duckduckgo_images
-- 
GitLab