Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Unverified Commit bc590cbc authored by Noémi Ványi's avatar Noémi Ványi Committed by GitHub
Browse files

Merge pull request #2592 from dalf/update-external-bangs

[mod] add utils/fetch_external_bangs.py
parents 606aa79e 7c1847d5
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -194,7 +194,8 @@ PYLINT_FILES=\
	searx/engines/google_news.py \
	searx/engines/google_videos.py \
	searx/engines/google_images.py \
	searx/engines/mediathekviewweb.py
	searx/engines/mediathekviewweb.py \
	utils/fetch_external_bangs.py

test.pylint: pyenvinstall
	$(call cmd,pylint,$(PYLINT_FILES))
+2 −5
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@ import json
from pathlib import Path


__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'EXTERNAL_URLS', 'WIKIDATA_UNITS',
__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'EXTERNAL_URLS', 'WIKIDATA_UNITS', 'EXTERNAL_BANGS',
            'bangs_loader', 'ahmia_blacklist_loader']
data_dir = Path(__file__).parent

@@ -12,10 +12,6 @@ def load(filename):
        return json.load(fd)


def bangs_loader():
    return load('bangs.json')


def ahmia_blacklist_loader():
    with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as fd:
        return fd.read().split()
@@ -26,3 +22,4 @@ CURRENCIES = load('currencies.json')
USER_AGENTS = load('useragents.json')
EXTERNAL_URLS = load('external_urls.json')
WIKIDATA_UNITS = load('wikidata_units.json')
EXTERNAL_BANGS = load('external_bangs.json')

searx/data/bangs.json

deleted100644 → 0
+0 −67913

File deleted.

Preview size limit exceeded, changes collapsed.

+19067 −0

File added.

Preview size limit exceeded, changes collapsed.

+76 −26
Original line number Diff line number Diff line
from searx.data import bangs_loader
# SPDX-License-Identifier: AGPL-3.0-or-later

# bangs data coming from the following url convert to json with
# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
# https://pseitz.github.io/toml-to-json-online-converter/
# NOTE only use the get_bang_url
from searx.data import EXTERNAL_BANGS

bangs_data = {}
for bang in bangs_loader()['bang']:
    for trigger in bang["triggers"]:
        bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}

def get_node(external_bangs_db, bang):
    node = external_bangs_db['trie']
    after = ''
    before = ''
    for bang_letter in bang:
        after += bang_letter
        if after in node and isinstance(node, dict):
            node = node[after]
            before += after
            after = ''
    return node, before, after

def get_bang_url(search_query):

def get_bang_definition_and_ac(external_bangs_db, bang):
    node, before, after = get_node(external_bangs_db, bang)

    bang_definition = None
    bang_ac_list = []
    if after != '':
        for k in node:
            if k.startswith(after):
                bang_ac_list.append(before + k)
    elif isinstance(node, dict):
        bang_definition = node.get('*')
        bang_ac_list = [before + k for k in node.keys() if k != '*']
    elif isinstance(node, str):
        bang_definition = node
        bang_ac_list = []

    return bang_definition, bang_ac_list


def resolve_bang_definition(bang_definition, query):
    url, rank = bang_definition.split(chr(1))
    url = url.replace(chr(2), query)
    if url.startswith('//'):
        url = 'https:' + url
    rank = int(rank) if len(rank) > 0 else 0
    return (url, rank)


def get_bang_definition_and_autocomplete(bang, external_bangs_db=None):
    global EXTERNAL_BANGS
    if external_bangs_db is None:
        external_bangs_db = EXTERNAL_BANGS

    bang_definition, bang_ac_list = get_bang_definition_and_ac(external_bangs_db, bang)

    new_autocomplete = []
    current = [*bang_ac_list]
    done = set()
    while len(current) > 0:
        bang_ac = current.pop(0)
        done.add(bang_ac)

        current_bang_definition, current_bang_ac_list = get_bang_definition_and_ac(external_bangs_db, bang_ac)
        if current_bang_definition:
            _, order = resolve_bang_definition(current_bang_definition, '')
            new_autocomplete.append((bang_ac, order))
        for new_bang in current_bang_ac_list:
            if new_bang not in done and new_bang not in current:
                current.append(new_bang)

    new_autocomplete.sort(key=lambda t: (-t[1], t[0]))
    new_autocomplete = list(map(lambda t: t[0], new_autocomplete))

    return bang_definition, new_autocomplete


def get_bang_url(search_query, external_bangs_db=None):
    """
    Redirects if the user supplied a correct bang search.
    :param search_query: This is a search_query object which contains preferences and the submitted queries.
    :return: None if the bang was invalid, else a string of the redirect url.
    """
    global EXTERNAL_BANGS
    if external_bangs_db is None:
        external_bangs_db = EXTERNAL_BANGS

    if search_query.external_bang:
        query = search_query.query
        bang = _get_bang(search_query.external_bang)

        if bang and query:
            # TODO add region support.
            bang_url = bang["regions"]["default"]
        bang_definition, _ = get_bang_definition_and_ac(external_bangs_db, search_query.external_bang)
        return resolve_bang_definition(bang_definition, search_query.query)[0] if bang_definition else None

            return bang_url.replace("{{{term}}}", query)
    return None


def _get_bang(user_bang):
    """
    Searches if the supplied user bang is available. Returns None if not found.
    :param user_bang: The parsed user bang. For example yt
    :return: Returns a dict with bangs data (check bangs_data.json for the structure)
    """
    return bangs_data.get(user_bang)
Loading