Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 772c048d authored by Marc Abonce Seguin's avatar Marc Abonce Seguin
Browse files

refactor engine's search language handling

Add match_language function in utils to match any user given
language code with a list of engine's supported languages.

Also add language_aliases dict on each engine to translate
standard language codes into the custom codes used by the engine.
parent d1eae935
Loading
Loading
Loading
Loading
+1 −1

File changed.

Preview size limit exceeded, changes collapsed.

+20 −1
Original line number Diff line number Diff line
@@ -20,13 +20,14 @@ import sys
import threading
from os.path import realpath, dirname
from io import open
from babel.localedata import locale_identifiers
from flask_babel import gettext
from operator import itemgetter
from json import loads
from requests import get
from searx import settings
from searx import logger
from searx.utils import load_module
from searx.utils import load_module, match_language


logger = logger.getChild('engines')
@@ -38,6 +39,8 @@ engines = {}
categories = {'general': []}

languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read())
babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
               for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]

engine_shortcuts = {}
engine_default_args = {'paging': False,
@@ -97,6 +100,22 @@ def load_engine(engine_data):
    if engine_data['name'] in languages:
        setattr(engine, 'supported_languages', languages[engine_data['name']])

    # find custom aliases for non standard language codes
    if hasattr(engine, 'supported_languages'):
        if hasattr(engine, 'language_aliases'):
            language_aliases = getattr(engine, 'language_aliases')
        else:
            language_aliases = {}

        for engine_lang in getattr(engine, 'supported_languages'):
            iso_lang = match_language(engine_lang, babel_langs, fallback=None)
            if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \
               iso_lang not in getattr(engine, 'supported_languages'):
                language_aliases[iso_lang] = engine_lang

        if language_aliases:
            setattr(engine, 'language_aliases', language_aliases)

    # assign language fetching method if auxiliary method exists
    if hasattr(engine, '_fetch_supported_languages'):
        setattr(engine, 'fetch_supported_languages',
+2 −2
Original line number Diff line number Diff line
@@ -99,13 +99,13 @@ supported_languages = dict(lang_urls, **main_langs)

# do search-request
def request(query, params):
    # translate the locale (e.g. 'en_US') to language code ('en')
    # translate the locale (e.g. 'en-US') to language code ('en')
    language = locale_to_lang_code(params['language'])

    # if our language is hosted on the main site, we need to add its name
    # to the query in order to narrow the results to that language
    if language in main_langs:
        query += '(' + main_langs[language] + ')'
        query += b' (' + main_langs[language] + b')'

    # prepare the request parameters
    query = urlencode({'search': query})
+4 −2
Original line number Diff line number Diff line
@@ -16,12 +16,14 @@
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
from searx.utils import match_language

# engine dependent config
categories = ['general']
paging = True
language_support = True
supported_languages_url = 'https://www.bing.com/account/general'
language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'}

# search-url
base_url = 'https://www.bing.com/'
@@ -32,9 +34,9 @@ search_string = 'search?{query}&first={offset}'
def request(query, params):
    offset = (params['pageno'] - 1) * 10 + 1

    lang = params['language'].split('-')[0].upper()
    lang = match_language(params['language'], supported_languages, language_aliases)

    query = u'language:{} {}'.format(lang, query.decode('utf-8')).encode('utf-8')
    query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')

    search_path = search_string.format(
        query=urlencode({'q': query}),
+2 −21
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ from lxml import html
from json import loads
import re
from searx.url_utils import urlencode
from searx.utils import match_language

# engine dependent config
categories = ['images']
@@ -46,26 +47,6 @@ safesearch_types = {2: 'STRICT',
_quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U)


# get supported region code
def get_region_code(lang, lang_list=None):
    region = None
    if lang in (lang_list or supported_languages):
        region = lang
    elif lang.startswith('no'):
        region = 'nb-NO'
    else:
        # try to get a supported country code with language
        lang = lang.split('-')[0]
        for lc in (lang_list or supported_languages):
            if lang == lc.split('-')[0]:
                region = lc
                break
    if region:
        return region.lower()
    else:
        return 'en-us'


# do search-request
def request(query, params):
    offset = (params['pageno'] - 1) * 10 + 1
@@ -74,7 +55,7 @@ def request(query, params):
        query=urlencode({'q': query}),
        offset=offset)

    language = get_region_code(params['language'])
    language = match_language(params['language'], supported_languages).lower()

    params['cookies']['SRCHHPGUSR'] = \
        'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
Loading