Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0c7e9e62 authored by Israel Yago Pereira's avatar Israel Yago Pereira
Browse files

Merge branch '4308b-improve-accuracy' into 'master'

[enh] Improve ranking based on language (#3053)

See merge request e/cloud/my-spot!91
parents 13e8a21f 1df80478
Loading
Loading
Loading
Loading
+12 −4
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ from urllib.parse import urlparse, unquote
from searx import logger
from searx.engines import engines
from searx.metrology.error_recorder import record_error
from searx import settings


CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
@@ -129,13 +130,18 @@ def merge_two_infoboxes(infobox1, infobox2):
            infobox1['content'] = content2


def result_score(result):
def result_score(result, language):
    weight = 1.0

    for result_engine in result['engines']:
        if hasattr(engines[result_engine], 'weight'):
            weight *= float(engines[result_engine].weight)

    if settings['search']['prefer_configured_language']:
        domain_parts = result['parsed_url'].netloc.split('.')
        if language in domain_parts:
            weight *= 1.1

    occurences = len(result['positions'])

    return sum((occurences * weight) / position for position in result['positions'])
@@ -145,9 +151,10 @@ class ResultContainer:
    """docstring for ResultContainer"""

    __slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\
                '_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url', 'engine_data'
                '_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url', 'engine_data',\
                '_language'

    def __init__(self):
    def __init__(self, language):
        super().__init__()
        self._merged_results = []
        self.infoboxes = []
@@ -161,6 +168,7 @@ class ResultContainer:
        self.unresponsive_engines = set()
        self.timings = []
        self.redirect_url = None
        self._language = language.lower().split('-')[0]

    def extend(self, engine_name, results):
        standard_result_count = 0
@@ -299,7 +307,7 @@ class ResultContainer:

    def order_results(self):
        for result in self._merged_results:
            score = result_score(result)
            score = result_score(result, self._language)
            result['score'] = score
            with RLock():
                for result_engine in result['engines']:
+1 −1
Original line number Diff line number Diff line
@@ -63,7 +63,7 @@ class Search:
        # init vars
        super().__init__()
        self.search_query = search_query
        self.result_container = ResultContainer()
        self.result_container = ResultContainer(search_query.lang)
        self.start_time = None
        self.actual_timeout = None

+1 −0
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ search:
    default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py'
    ban_time_on_fail : 5 # ban time in seconds after engine errors
    max_ban_time_on_fail : 120 # max ban time in seconds after engine errors
    prefer_configured_language: True # increase weight of results in configured language in ranking

server:
    port : 80
+4 −4
Original line number Diff line number Diff line
@@ -20,22 +20,22 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff',
class ResultContainerTestCase(SearxTestCase):

    def test_empty(self):
        c = ResultContainer()
        c = ResultContainer("en-US")
        self.assertEqual(c.get_ordered_results(), [])

    def test_one_result(self):
        c = ResultContainer()
        c = ResultContainer("en-US")
        c.extend('wikipedia', [fake_result()])
        self.assertEqual(c.results_length(), 1)

    def test_one_suggestion(self):
        c = ResultContainer()
        c = ResultContainer("en-US")
        c.extend('wikipedia', [fake_result(suggestion=True)])
        self.assertEqual(len(c.suggestions), 1)
        self.assertEqual(c.results_length(), 0)

    def test_result_merge(self):
        c = ResultContainer()
        c = ResultContainer("en-US")
        c.extend('wikipedia', [fake_result()])
        c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
        self.assertEqual(c.results_length(), 2)