Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d5ecda99 authored by Markus Heiser's avatar Markus Heiser
Browse files

[mod] move language recognition to get_search_query_from_webapp



To set the language from language recognition and hold the value selected by the
client, the previous implementation creates a copy of the SearchQuery object and
manipulates the SearchQuery object by calling function replace_auto_language().

This patch tries to implement a similar functionality in a more central place,
in function get_search_query_from_webapp() when the SearchQuery object is build
up.

Additional this patch uses the language preferred by the client, if language
recognition does not have a match / the existing implementation does not care
about client preferences and uses 'all' in case of no match.

Signed-off-by: default avatarMarkus Heiser <markus.heiser@darmarit.de>
parent c03b0ea6
Loading
Loading
Loading
Loading
+2 −41
Original line number Diff line number Diff line
@@ -22,7 +22,6 @@ from searx.network import initialize as initialize_network, check_network_config
from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time
from searx.search.processors import PROCESSORS, initialize as initialize_processors
from searx.search.checker import initialize as initialize_checker
from searx.utils import detect_language


logger = logger.getChild('search')
@@ -40,57 +39,19 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False,
        initialize_checker()


def replace_auto_language(search_query: SearchQuery):
    """
    Do nothing except if `search_query.lang` is "auto".
    In this case:
    * the value "auto" is replaced by the detected language of the query.
      The default value is "all" when no language is detected.
    * `search_query.locale` is updated accordingly

    Use :py:obj:`searx.utils.detect_language` with `only_search_languages=True` to keep
    only languages supported by the engines.
    """
    if search_query.lang != 'auto':
        return

    detected_lang = detect_language(search_query.query, threshold=0.3, only_search_languages=True)
    if detected_lang is None:
        # fallback to 'all' if no language has been detected
        search_query.lang = 'all'
        search_query.locale = None
        return
    search_query.lang = detected_lang
    try:
        search_query.locale = babel.Locale.parse(search_query.lang)
    except babel.core.UnknownLocaleError:
        search_query.locale = None


class Search:
    """Search information container"""

    __slots__ = "search_query", "result_container", "start_time", "actual_timeout"

    def __init__(self, search_query: SearchQuery):
        """Initialize the Search

        search_query is copied
        """
        """Initialize the Search"""
        # init vars
        super().__init__()
        self.search_query = search_query
        self.result_container = ResultContainer()
        self.start_time = None
        self.actual_timeout = None
        self.search_query = copy(search_query)
        self.update_search_query(self.search_query)

    def update_search_query(self, search_query: SearchQuery):
        """Update search_query.

        call replace_auto_language to replace the "auto" language
        """
        replace_auto_language(search_query)

    def search_external_bang(self):
        """
+30 −2
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ from searx.query import RawTextQuery
from searx.engines import categories, engines
from searx.search import SearchQuery, EngineRef
from searx.preferences import Preferences, is_locked
from searx.utils import detect_language


# remove duplicate queries.
@@ -214,7 +215,27 @@ def parse_engine_data(form):

def get_search_query_from_webapp(
    preferences: Preferences, form: Dict[str, str]
) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]:
) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef], str]:
    """Assemble data from preferences and request.form (from the HTML form) needed
    in a search query.

    The returned tuple consits of:

    1. instance of :py:obj:`searx.search.SearchQuery`
    2. instance of :py:obj:`searx.query.RawTextQuery`
    3. list of :py:obj:`searx.search.EngineRef` instances
    4. string with the *selected locale* of the query

    About language/locale: if the client selects the alias ``auto`` the
    ``SearchQuery`` object is build up by the :py:obj:`detected language
    <searx.utils.detect_language>`.  If language recognition does not have a
    match the language preferred by the :py:obj:`Preferences.client` is used.
    If client does not have a preference, the default ``all`` is used.

    The *selected locale* in the tuple always represents the selected
    language/locale and might differ from the language recognition.

    """
    # no text for the query ?
    if not form.get('q'):
        raise SearxParameterException('q', '')
@@ -229,13 +250,19 @@ def get_search_query_from_webapp(
    # set query
    query = raw_text_query.getQuery()
    query_pageno = parse_pageno(form)
    query_lang = parse_lang(preferences, form, raw_text_query)
    query_safesearch = parse_safesearch(preferences, form)
    query_time_range = parse_time_range(form)
    query_timeout = parse_timeout(form, raw_text_query)
    external_bang = raw_text_query.external_bang
    engine_data = parse_engine_data(form)

    query_lang = parse_lang(preferences, form, raw_text_query)
    selected_locale = query_lang

    if query_lang == 'auto':
        query_lang = detect_language(query, threshold=0.8, only_search_languages=True)
        query_lang = query_lang or preferences.client.locale_tag or 'all'

    if not is_locked('categories') and raw_text_query.specific:
        # if engines are calculated from query,
        # set categories by using that information
@@ -265,4 +292,5 @@ def get_search_query_from_webapp(
        raw_text_query,
        query_engineref_list_unknown,
        query_engineref_list_notoken,
        selected_locale,
    )
+6 −14
Original line number Diff line number Diff line
@@ -84,6 +84,7 @@ from searx.webutils import (
from searx.webadapter import (
    get_search_query_from_webapp,
    get_selected_categories,
    parse_lang,
)
from searx.utils import (
    html_to_text,
@@ -440,11 +441,7 @@ def render(template_name: str, **kwargs):
        kwargs['rtl'] = True

    if 'current_language' not in kwargs:
        _locale = request.preferences.get_value('language')
        if _locale in ('auto', 'all'):
            kwargs['current_language'] = _locale
        else:
            kwargs['current_language'] = match_locale(_locale, settings['search']['languages'])
        kwargs['current_language'] = parse_lang(request.preferences, {}, RawTextQuery('', []))

    # values from settings
    kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
@@ -678,7 +675,9 @@ def search():
    raw_text_query = None
    result_container = None
    try:
        search_query, raw_text_query, _, _ = get_search_query_from_webapp(request.preferences, request.form)
        search_query, raw_text_query, _, _, selected_locale = get_search_query_from_webapp(
            request.preferences, request.form
        )
        # search = Search(search_query) #  without plugins
        search = SearchWithPlugins(search_query, request.user_plugins, request)  # pylint: disable=redefined-outer-name

@@ -809,13 +808,6 @@ def search():
        )
    )

    if search_query.lang in ('auto', 'all'):
        current_language = search_query.lang
    else:
        current_language = match_locale(
            search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language")
        )

    # search_query.lang contains the user choice (all, auto, en, ...)
    # when the user choice is "auto", search.search_query.lang contains the detected language
    # otherwise it is equals to search_query.lang
@@ -838,7 +830,7 @@ def search():
            result_container.unresponsive_engines
        ),
        current_locale = request.preferences.get_value("locale"),
        current_language = current_language,
        current_language = selected_locale,
        search_language = match_locale(
            search.search_query.lang,
            settings['search']['languages'],