Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Unverified Commit 8d47142f authored by Adam Tauber's avatar Adam Tauber Committed by GitHub
Browse files

Merge pull request #2189 from dalf/architecture-clean-up

Architecture clean up
parents c2a6f145 f2f3300b
Loading
Loading
Loading
Loading
+8 −34
Original line number Diff line number Diff line
@@ -20,9 +20,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
import re

from searx.languages import language_codes
from searx.engines import (
    categories, engines, engine_shortcuts
)
from searx.engines import categories, engines, engine_shortcuts
from searx.search import EngineRef


VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
@@ -40,7 +39,7 @@ class RawTextQuery:
            self.disabled_engines = disabled_engines

        self.query_parts = []
        self.engines = []
        self.enginerefs = []
        self.languages = []
        self.timeout_limit = None
        self.external_bang = None
@@ -135,24 +134,19 @@ class RawTextQuery:
                    parse_next = True
                    engine_name = engine_shortcuts[prefix]
                    if engine_name in engines:
                        self.engines.append({'category': 'none',
                                             'name': engine_name,
                                             'from_bang': True})
                        self.enginerefs.append(EngineRef(engine_name, 'none', True))

                # check if prefix is equal with engine name
                elif prefix in engines:
                    parse_next = True
                    self.engines.append({'category': 'none',
                                         'name': prefix,
                                         'from_bang': True})
                    self.enginerefs.append(EngineRef(prefix, 'none', True))

                # check if prefix is equal with categorie name
                elif prefix in categories:
                    # using all engines for that search, which
                    # are declared under that categorie name
                    parse_next = True
                    self.engines.extend({'category': prefix,
                                         'name': engine.name}
                    self.enginerefs.extend(EngineRef(engine.name, prefix)
                                           for engine in categories[prefix]
                                           if (engine.name, prefix) not in self.disabled_engines)

@@ -178,23 +172,3 @@ class RawTextQuery:
    def getFullQuery(self):
        # get full querry including whitespaces
        return ''.join(self.query_parts)


class SearchQuery:
    """container for all the search parameters (query, language, etc...)"""

    def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
                 timeout_limit=None, preferences=None, external_bang=None):
        self.query = query
        self.engines = engines
        self.categories = categories
        self.lang = lang
        self.safesearch = safesearch
        self.pageno = pageno
        self.time_range = None if time_range in ('', 'None', None) else time_range
        self.timeout_limit = timeout_limit
        self.preferences = preferences
        self.external_bang = external_bang

    def __str__(self):
        return self.query + ";" + str(self.engines)
+58 −179
Original line number Diff line number Diff line
@@ -15,27 +15,22 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
'''

import typing
import gc
import sys
import threading
from time import time
from uuid import uuid4
from _thread import start_new_thread

from flask_babel import gettext
import requests.exceptions
import searx.poolrequests as requests_lib
from searx.engines import (
    categories, engines, settings
)
from searx.engines import engines, settings
from searx.answerers import ask
from searx.external_bang import get_bang_url
from searx.utils import gen_useragent
from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
from searx.results import ResultContainer
from searx import logger
from searx.plugins import plugins
from searx.exceptions import SearxParameterException


logger = logger.getChild('search')
@@ -53,6 +48,49 @@ else:
        exit(1)


class EngineRef:

    __slots__ = 'name', 'category', 'from_bang'

    def __init__(self, name: str, category: str, from_bang: bool=False):
        self.name = name
        self.category = category
        self.from_bang = from_bang

    def __str__(self):
        return "(" + self.name + "," + self.category + "," + str(self.from_bang) + ")"


class SearchQuery:
    """container for all the search parameters (query, language, etc...)"""

    __slots__ = 'query', 'engineref_list', 'categories', 'lang', 'safesearch', 'pageno', 'time_range',\
                'timeout_limit', 'external_bang'

    def __init__(self,
                 query: str,
                 engineref_list: typing.List[EngineRef],
                 categories: typing.List[str],
                 lang: str,
                 safesearch: bool,
                 pageno: int,
                 time_range: typing.Optional[str],
                 timeout_limit: typing.Optional[float]=None,
                 external_bang: typing.Optional[str]=False):
        self.query = query
        self.engineref_list = engineref_list
        self.categories = categories
        self.lang = lang
        self.safesearch = safesearch
        self.pageno = pageno
        self.time_range = time_range
        self.timeout_limit = timeout_limit
        self.external_bang = external_bang

    def __str__(self):
        return self.query + ";" + str(self.engineref_list)


def send_http_request(engine, request_params):
    # create dictionary which contain all
    # informations about the request
@@ -247,167 +285,11 @@ def default_request_params():
    }


# remove duplicate queries.
# FIXME: does not fix "!music !soundcloud", because the categories are 'none' and 'music'
def deduplicate_query_engines(query_engines):
    uniq_query_engines = {q["category"] + '|' + q["name"]: q for q in query_engines}
    return uniq_query_engines.values()


def get_search_query_from_webapp(preferences, form):
    # no text for the query ?
    if not form.get('q'):
        raise SearxParameterException('q', '')

    # set blocked engines
    disabled_engines = preferences.engines.get_disabled()

    # parse query, if tags are set, which change
    # the serch engine or search-language
    raw_text_query = RawTextQuery(form['q'], disabled_engines)

    # set query
    query = raw_text_query.getQuery()

    # get and check page number
    pageno_param = form.get('pageno', '1')
    if not pageno_param.isdigit() or int(pageno_param) < 1:
        raise SearxParameterException('pageno', pageno_param)
    query_pageno = int(pageno_param)

    # get language
    # set specific language if set on request, query or preferences
    # TODO support search with multible languages
    if len(raw_text_query.languages):
        query_lang = raw_text_query.languages[-1]
    elif 'language' in form:
        query_lang = form.get('language')
    else:
        query_lang = preferences.get_value('language')

    # check language
    if not VALID_LANGUAGE_CODE.match(query_lang):
        raise SearxParameterException('language', query_lang)

    # get safesearch
    if 'safesearch' in form:
        query_safesearch = form.get('safesearch')
        # first check safesearch
        if not query_safesearch.isdigit():
            raise SearxParameterException('safesearch', query_safesearch)
        query_safesearch = int(query_safesearch)
    else:
        query_safesearch = preferences.get_value('safesearch')

    # safesearch : second check
    if query_safesearch < 0 or query_safesearch > 2:
        raise SearxParameterException('safesearch', query_safesearch)

    # get time_range
    query_time_range = form.get('time_range')

    # check time_range
    if query_time_range not in ('None', None, '', 'day', 'week', 'month', 'year'):
        raise SearxParameterException('time_range', query_time_range)

    # query_engines
    query_engines = raw_text_query.engines

    # timeout_limit
    query_timeout = raw_text_query.timeout_limit
    if query_timeout is None and 'timeout_limit' in form:
        raw_time_limit = form.get('timeout_limit')
        if raw_time_limit in ['None', '']:
            raw_time_limit = None
        else:
            try:
                query_timeout = float(raw_time_limit)
            except ValueError:
                raise SearxParameterException('timeout_limit', raw_time_limit)

    # query_categories
    query_categories = []

    # if engines are calculated from query,
    # set categories by using that informations
    if query_engines and raw_text_query.specific:
        additional_categories = set()
        for engine in query_engines:
            if 'from_bang' in engine and engine['from_bang']:
                additional_categories.add('none')
            else:
                additional_categories.add(engine['category'])
        query_categories = list(additional_categories)

    # otherwise, using defined categories to
    # calculate which engines should be used
    else:
        # set categories/engines
        load_default_categories = True
        for pd_name, pd in form.items():
            if pd_name == 'categories':
                query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories)
            elif pd_name == 'engines':
                pd_engines = [{'category': engines[engine].categories[0],
                               'name': engine}
                              for engine in map(str.strip, pd.split(',')) if engine in engines]
                if pd_engines:
                    query_engines.extend(pd_engines)
                    load_default_categories = False
            elif pd_name.startswith('category_'):
                category = pd_name[9:]

                # if category is not found in list, skip
                if category not in categories:
                    continue

                if pd != 'off':
                    # add category to list
                    query_categories.append(category)
                elif category in query_categories:
                    # remove category from list if property is set to 'off'
                    query_categories.remove(category)

        if not load_default_categories:
            if not query_categories:
                query_categories = list(set(engine['category']
                                            for engine in query_engines))
        else:
            # if no category is specified for this search,
            # using user-defined default-configuration which
            # (is stored in cookie)
            if not query_categories:
                cookie_categories = preferences.get_value('categories')
                for ccateg in cookie_categories:
                    if ccateg in categories:
                        query_categories.append(ccateg)

            # if still no category is specified, using general
            # as default-category
            if not query_categories:
                query_categories = ['general']

            # using all engines for that search, which are
            # declared under the specific categories
            for categ in query_categories:
                query_engines.extend({'category': categ,
                                      'name': engine.name}
                                     for engine in categories[categ]
                                     if (engine.name, categ) not in disabled_engines)

    query_engines = deduplicate_query_engines(query_engines)
    external_bang = raw_text_query.external_bang

    return (SearchQuery(query, query_engines, query_categories,
                        query_lang, query_safesearch, query_pageno,
                        query_time_range, query_timeout, preferences,
                        external_bang=external_bang),
            raw_text_query)


class Search:
    """Search information container"""

    __slots__ = "search_query", "result_container", "start_time", "actual_timeout"

    def __init__(self, search_query):
        # init vars
        super().__init__()
@@ -444,9 +326,6 @@ class Search:
        return False

    def _is_accepted(self, engine_name, engine):
        if not self.search_query.preferences.validate_token(engine):
            return False

        # skip suspended engines
        if engine.suspend_end_time >= time():
            logger.debug('Engine currently suspended: %s', engine_name)
@@ -462,13 +341,13 @@ class Search:

        return True

    def _get_params(self, selected_engine, user_agent):
        if selected_engine['name'] not in engines:
    def _get_params(self, engineref, user_agent):
        if engineref.name not in engines:
            return None, None

        engine = engines[selected_engine['name']]
        engine = engines[engineref.name]

        if not self._is_accepted(selected_engine['name'], engine):
        if not self._is_accepted(engineref.name, engine):
            return None, None

        # set default request parameters
@@ -485,15 +364,13 @@ class Search:
            request_params['safesearch'] = self.search_query.safesearch
            request_params['time_range'] = self.search_query.time_range

        request_params['category'] = selected_engine['category']
        request_params['category'] = engineref.category
        request_params['pageno'] = self.search_query.pageno

        return request_params, engine.timeout

    # do search-request
    def _get_requests(self):
        global number_of_searches

        # init vars
        requests = []

@@ -505,14 +382,14 @@ class Search:
        default_timeout = 0

        # start search-reqest for all selected engines
        for selected_engine in self.search_query.engines:
        for engineref in self.search_query.engineref_list:
            # set default request parameters
            request_params, engine_timeout = self._get_params(selected_engine, user_agent)
            request_params, engine_timeout = self._get_params(engineref, user_agent)
            if request_params is None:
                continue

            # append request to list
            requests.append((selected_engine['name'], self.search_query.query, request_params))
            requests.append((engineref.name, self.search_query.query, request_params))

            # update default_timeout
            default_timeout = max(default_timeout, engine_timeout)
@@ -535,7 +412,7 @@ class Search:
            actual_timeout = min(query_timeout, max_request_timeout)

        logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})"
                     .format(self.actual_timeout, default_timeout, query_timeout, max_request_timeout))
                     .format(actual_timeout, default_timeout, query_timeout, max_request_timeout))

        return requests, actual_timeout

@@ -567,6 +444,8 @@ class Search:
class SearchWithPlugins(Search):
    """Similar to the Search class but call the plugins."""

    __slots__ = 'ordered_plugin_list', 'request'

    def __init__(self, search_query, ordered_plugin_list, request):
        super().__init__(search_query)
        self.ordered_plugin_list = ordered_plugin_list

searx/webadapter.py

0 → 100644
+235 −0
Original line number Diff line number Diff line
from typing import Dict, List, Optional, Tuple
from searx.exceptions import SearxParameterException
from searx.query import RawTextQuery, VALID_LANGUAGE_CODE
from searx.engines import categories, engines
from searx.search import SearchQuery, EngineRef
from searx.preferences import Preferences


# remove duplicate queries.
# FIXME: does not fix "!music !soundcloud", because the categories are 'none' and 'music'
def deduplicate_engineref_list(engineref_list: List[EngineRef]) -> List[EngineRef]:
    engineref_dict = {q.category + '|' + q.name: q for q in engineref_list}
    return engineref_dict.values()


def validate_engineref_list(engineref_list: List[EngineRef], preferences: Preferences)\
        -> Tuple[List[EngineRef], List[EngineRef], List[EngineRef]]:
    """Validate query_engines according to the preferences

    Returns:
        List[EngineRef]: list of existing engines with a validated token
        List[EngineRef]: list of unknown engine
        List[EngineRef]: list of engine with invalid token according to the preferences
    """
    valid = []
    unknown = []
    no_token = []
    for engineref in engineref_list:
        if engineref.name not in engines:
            unknown.append(engineref)
            continue

        engine = engines[engineref.name]
        if not preferences.validate_token(engine):
            no_token.append(engineref)
            continue

        valid.append(engineref)
    return valid, unknown, no_token


def parse_pageno(form: Dict[str, str]) -> int:
    pageno_param = form.get('pageno', '1')
    if not pageno_param.isdigit() or int(pageno_param) < 1:
        raise SearxParameterException('pageno', pageno_param)
    return int(pageno_param)


def parse_lang(preferences: Preferences, form: Dict[str, str], raw_text_query: RawTextQuery) -> str:
    # get language
    # set specific language if set on request, query or preferences
    # TODO support search with multible languages
    if len(raw_text_query.languages):
        query_lang = raw_text_query.languages[-1]
    elif 'language' in form:
        query_lang = form.get('language')
    else:
        query_lang = preferences.get_value('language')

    # check language
    if not VALID_LANGUAGE_CODE.match(query_lang):
        raise SearxParameterException('language', query_lang)

    return query_lang


def parse_safesearch(preferences: Preferences, form: Dict[str, str]) -> int:
    if 'safesearch' in form:
        query_safesearch = form.get('safesearch')
        # first check safesearch
        if not query_safesearch.isdigit():
            raise SearxParameterException('safesearch', query_safesearch)
        query_safesearch = int(query_safesearch)
    else:
        query_safesearch = preferences.get_value('safesearch')

    # safesearch : second check
    if query_safesearch < 0 or query_safesearch > 2:
        raise SearxParameterException('safesearch', query_safesearch)

    return query_safesearch


def parse_time_range(form: Dict[str, str]) -> str:
    query_time_range = form.get('time_range')
    # check time_range
    query_time_range = None if query_time_range in ('', 'None') else query_time_range
    if query_time_range not in (None, 'day', 'week', 'month', 'year'):
        raise SearxParameterException('time_range', query_time_range)
    return query_time_range


def parse_timeout(form: Dict[str, str], raw_text_query: RawTextQuery) -> Optional[float]:
    query_timeout = raw_text_query.timeout_limit
    if query_timeout is None and 'timeout_limit' in form:
        raw_time_limit = form.get('timeout_limit')
        if raw_time_limit in ['None', '']:
            return None
        else:
            try:
                return float(raw_time_limit)
            except ValueError:
                raise SearxParameterException('timeout_limit', raw_time_limit)


def parse_specific(raw_text_query: RawTextQuery) -> Tuple[List[EngineRef], List[str]]:
    query_engineref_list = raw_text_query.enginerefs
    additional_categories = set()
    for engineref in raw_text_query.enginerefs:
        if engineref.from_bang:
            additional_categories.add('none')
        else:
            additional_categories.add(engineref.category)
    query_categories = list(additional_categories)
    return query_engineref_list, query_categories


def parse_category_form(query_categories: List[str], name: str, value: str) -> None:
    if name == 'categories':
        query_categories.extend(categ for categ in map(str.strip, value.split(',')) if categ in categories)
    elif name.startswith('category_'):
        category = name[9:]

        # if category is not found in list, skip
        if category not in categories:
            return

        if value != 'off':
            # add category to list
            query_categories.append(category)
        elif category in query_categories:
            # remove category from list if property is set to 'off'
            query_categories.remove(category)


def get_selected_categories(preferences: Preferences, form: Dict[str, str]) -> List[str]:
    selected_categories = []

    if form is not None:
        for name, value in form.items():
            parse_category_form(selected_categories, name, value)

    # if no category is specified for this search,
    # using user-defined default-configuration which
    # (is stored in cookie)
    if not selected_categories:
        cookie_categories = preferences.get_value('categories')
        for ccateg in cookie_categories:
            selected_categories.append(ccateg)

    # if still no category is specified, using general
    # as default-category
    if not selected_categories:
        selected_categories = ['general']

    return selected_categories


def parse_generic(preferences: Preferences, form: Dict[str, str], disabled_engines: List[str])\
        -> Tuple[List[EngineRef], List[str]]:
    query_engineref_list = []
    query_categories = []

    # set categories/engines
    load_default_categories = True
    for pd_name, pd in form.items():
        if pd_name == 'engines':
            pd_engines = [EngineRef(engine_name, engines[engine_name].categories[0])
                          for engine_name in map(str.strip, pd.split(',')) if engine_name in engines]
            if pd_engines:
                query_engineref_list.extend(pd_engines)
                load_default_categories = False
        else:
            parse_category_form(query_categories, pd_name, pd)

    if not load_default_categories:
        if not query_categories:
            query_categories = list(set(engine['category']
                                        for engine in query_engineref_list))
    else:
        if not query_categories:
            query_categories = get_selected_categories(preferences, None)

        # using all engines for that search, which are
        # declared under the specific categories
        for categ in query_categories:
            query_engineref_list.extend(EngineRef(engine.name, categ)
                                        for engine in categories[categ]
                                        if (engine.name, categ) not in disabled_engines)

    return query_engineref_list, query_categories


def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str])\
        -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]:
    # no text for the query ?
    if not form.get('q'):
        raise SearxParameterException('q', '')

    # set blocked engines
    disabled_engines = preferences.engines.get_disabled()

    # parse query, if tags are set, which change
    # the serch engine or search-language
    raw_text_query = RawTextQuery(form['q'], disabled_engines)

    # set query
    query = raw_text_query.getQuery()
    query_pageno = parse_pageno(form)
    query_lang = parse_lang(preferences, form, raw_text_query)
    query_safesearch = parse_safesearch(preferences, form)
    query_time_range = parse_time_range(form)
    query_timeout = parse_timeout(form, raw_text_query)
    external_bang = raw_text_query.external_bang

    if raw_text_query.enginerefs and raw_text_query.specific:
        # if engines are calculated from query,
        # set categories by using that informations
        query_engineref_list, query_categories = parse_specific(raw_text_query)
    else:
        # otherwise, using defined categories to
        # calculate which engines should be used
        query_engineref_list, query_categories = parse_generic(preferences, form, disabled_engines)

    query_engineref_list = deduplicate_engineref_list(query_engineref_list)
    query_engineref_list, query_engineref_list_unknown, query_engineref_list_notoken =\
        validate_engineref_list(query_engineref_list, preferences)

    return (SearchQuery(query, query_engineref_list, query_categories,
                        query_lang, query_safesearch, query_pageno,
                        query_time_range, query_timeout,
                        external_bang=external_bang),
            raw_text_query,
            query_engineref_list_unknown,
            query_engineref_list_notoken)
+7 −21

File changed.

Preview size limit exceeded, changes collapsed.

+19 −65

File changed.

Preview size limit exceeded, changes collapsed.

Loading