Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7acd7ffc authored by Alexandre Flament's avatar Alexandre Flament
Browse files

[enh] rewrite and enhance metrics

parent aae7830d
Loading
Loading
Loading
Loading
+0 −114
Original line number Diff line number Diff line
@@ -21,7 +21,6 @@ import threading
from os.path import realpath, dirname
from babel.localedata import locale_identifiers
from urllib.parse import urlparse
from flask_babel import gettext
from operator import itemgetter
from searx import settings
from searx import logger
@@ -136,22 +135,6 @@ def load_engine(engine_data):
        setattr(engine, 'fetch_supported_languages',
                lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)))

    engine.stats = {
        'sent_search_count': 0,  # sent search
        'search_count': 0,  # succesful search
        'result_count': 0,
        'engine_time': 0,
        'engine_time_count': 0,
        'score_count': 0,
        'errors': 0
    }

    engine_type = getattr(engine, 'engine_type', 'online')

    if engine_type != 'offline':
        engine.stats['page_load_time'] = 0
        engine.stats['page_load_count'] = 0

    # tor related settings
    if settings['outgoing'].get('using_tor_proxy'):
        # use onion url if using tor.
@@ -175,103 +158,6 @@ def load_engine(engine_data):
    return engine


def to_percentage(stats, maxvalue):
    for engine_stat in stats:
        if maxvalue:
            engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
        else:
            engine_stat['percentage'] = 0
    return stats


def get_engines_stats(preferences):
    # TODO refactor
    pageloads = []
    engine_times = []
    results = []
    scores = []
    errors = []
    scores_per_result = []

    max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0  # noqa
    for engine in engines.values():
        if not preferences.validate_token(engine):
            continue

        if engine.stats['search_count'] == 0:
            continue

        results_num = \
            engine.stats['result_count'] / float(engine.stats['search_count'])

        if engine.stats['engine_time_count'] != 0:
            this_engine_time = engine.stats['engine_time'] / float(engine.stats['engine_time_count'])  # noqa
        else:
            this_engine_time = 0

        if results_num:
            score = engine.stats['score_count'] / float(engine.stats['search_count'])  # noqa
            score_per_result = score / results_num
        else:
            score = score_per_result = 0.0

        if engine.engine_type != 'offline':
            load_times = 0
            if engine.stats['page_load_count'] != 0:
                load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count'])  # noqa
            max_pageload = max(load_times, max_pageload)
            pageloads.append({'avg': load_times, 'name': engine.name})

        max_engine_times = max(this_engine_time, max_engine_times)
        max_results = max(results_num, max_results)
        max_score = max(score, max_score)
        max_score_per_result = max(score_per_result, max_score_per_result)
        max_errors = max(max_errors, engine.stats['errors'])

        engine_times.append({'avg': this_engine_time, 'name': engine.name})
        results.append({'avg': results_num, 'name': engine.name})
        scores.append({'avg': score, 'name': engine.name})
        errors.append({'avg': engine.stats['errors'], 'name': engine.name})
        scores_per_result.append({
            'avg': score_per_result,
            'name': engine.name
        })

    pageloads = to_percentage(pageloads, max_pageload)
    engine_times = to_percentage(engine_times, max_engine_times)
    results = to_percentage(results, max_results)
    scores = to_percentage(scores, max_score)
    scores_per_result = to_percentage(scores_per_result, max_score_per_result)
    errors = to_percentage(errors, max_errors)

    return [
        (
            gettext('Engine time (sec)'),
            sorted(engine_times, key=itemgetter('avg'))
        ),
        (
            gettext('Page loads (sec)'),
            sorted(pageloads, key=itemgetter('avg'))
        ),
        (
            gettext('Number of results'),
            sorted(results, key=itemgetter('avg'), reverse=True)
        ),
        (
            gettext('Scores'),
            sorted(scores, key=itemgetter('avg'), reverse=True)
        ),
        (
            gettext('Scores per result'),
            sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
        ),
        (
            gettext('Errors'),
            sorted(errors, key=itemgetter('avg'), reverse=True)
        ),
    ]


def load_engines(engine_list):
    global engines, engine_shortcuts
    engines.clear()
+206 −0
Original line number Diff line number Diff line
# SPDX-License-Identifier: AGPL-3.0-or-later

import typing
import math
import contextlib
from timeit import default_timer
from operator import itemgetter

from searx.engines import engines
from .models import HistogramStorage, CounterStorage
from .error_recorder import count_error, count_exception, errors_per_engines

__all__ = ["initialize",
           "get_engines_stats", "get_engine_errors",
           "histogram", "histogram_observe", "histogram_observe_time",
           "counter", "counter_inc", "counter_add",
           "count_error", "count_exception"]


ENDPOINTS = {'search'}


histogram_storage: typing.Optional[HistogramStorage] = None
counter_storage: typing.Optional[CounterStorage] = None


@contextlib.contextmanager
def histogram_observe_time(*args):
    h = histogram_storage.get(*args)
    before = default_timer()
    yield before
    duration = default_timer() - before
    if h:
        h.observe(duration)
    else:
        raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")


def histogram_observe(duration, *args):
    histogram_storage.get(*args).observe(duration)


def histogram(*args, raise_on_not_found=True):
    h = histogram_storage.get(*args)
    if raise_on_not_found and h is None:
        raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
    return h


def counter_inc(*args):
    counter_storage.add(1, *args)


def counter_add(value, *args):
    counter_storage.add(value, *args)


def counter(*args):
    return counter_storage.get(*args)


def initialize(engine_names=None):
    """
    Initialize metrics
    """
    global counter_storage, histogram_storage

    counter_storage = CounterStorage()
    histogram_storage = HistogramStorage()

    # max_timeout = max of all the engine.timeout
    max_timeout = 2
    for engine_name in (engine_names or engines):
        if engine_name in engines:
            max_timeout = max(max_timeout, engines[engine_name].timeout)

    # histogram configuration
    histogram_width = 0.1
    histogram_size = int(1.5 * max_timeout / histogram_width)

    # engines
    for engine_name in (engine_names or engines):
        # search count
        counter_storage.configure('engine', engine_name, 'search', 'count', 'sent')
        counter_storage.configure('engine', engine_name, 'search', 'count', 'successful')
        # global counter of errors
        counter_storage.configure('engine', engine_name, 'search', 'count', 'error')
        # score of the engine
        counter_storage.configure('engine', engine_name, 'score')
        # result count per requests
        histogram_storage.configure(1, 100, 'engine', engine_name, 'result', 'count')
        # time doing HTTP requests
        histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http')
        # total time
        # .time.request and ...response times may overlap .time.http time.
        histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total')


def get_engine_errors(engline_list):
    result = {}
    engine_names = list(errors_per_engines.keys())
    engine_names.sort()
    for engine_name in engine_names:
        if engine_name not in engline_list:
            continue

        error_stats = errors_per_engines[engine_name]
        sent_search_count = max(counter('engine', engine_name, 'search', 'count', 'sent'), 1)
        sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1])
        r = []
        for context, count in sorted_context_count_list:
            percentage = round(20 * count / sent_search_count) * 5
            r.append({
                'filename': context.filename,
                'function': context.function,
                'line_no': context.line_no,
                'code': context.code,
                'exception_classname': context.exception_classname,
                'log_message': context.log_message,
                'log_parameters': context.log_parameters,
                'secondary': context.secondary,
                'percentage': percentage,
            })
        result[engine_name] = sorted(r, reverse=True, key=lambda d: d['percentage'])
    return result


def to_percentage(stats, maxvalue):
    for engine_stat in stats:
        if maxvalue:
            engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
        else:
            engine_stat['percentage'] = 0
    return stats


def get_engines_stats(engine_list):
    global counter_storage, histogram_storage

    assert counter_storage is not None
    assert histogram_storage is not None

    list_time = []
    list_time_http = []
    list_time_total = []
    list_result_count = []
    list_error_count = []
    list_scores = []
    list_scores_per_result = []

    max_error_count = max_http_time = max_time_total = max_result_count = max_score = None  # noqa
    for engine_name in engine_list:
        error_count = counter('engine', engine_name, 'search', 'count', 'error')

        if counter('engine', engine_name, 'search', 'count', 'sent') > 0:
            list_error_count.append({'avg': error_count, 'name': engine_name})
            max_error_count = max(error_count, max_error_count or 0)

        successful_count = counter('engine', engine_name, 'search', 'count', 'successful')
        if successful_count == 0:
            continue

        result_count_sum = histogram('engine', engine_name, 'result', 'count').sum
        time_total = histogram('engine', engine_name, 'time', 'total').percentage(50)
        time_http = histogram('engine', engine_name, 'time', 'http').percentage(50)
        result_count = result_count_sum / float(successful_count)

        if result_count:
            score = counter('engine', engine_name, 'score')  # noqa
            score_per_result = score / float(result_count_sum)
        else:
            score = score_per_result = 0.0

        max_time_total = max(time_total, max_time_total or 0)
        max_http_time = max(time_http, max_http_time or 0)
        max_result_count = max(result_count, max_result_count or 0)
        max_score = max(score, max_score or 0)

        list_time.append({'total': round(time_total, 1),
                          'http': round(time_http, 1),
                          'name': engine_name,
                          'processing': round(time_total - time_http, 1)})
        list_time_total.append({'avg': time_total, 'name': engine_name})
        list_time_http.append({'avg': time_http, 'name': engine_name})
        list_result_count.append({'avg': result_count, 'name': engine_name})
        list_scores.append({'avg': score, 'name': engine_name})
        list_scores_per_result.append({'avg': score_per_result, 'name': engine_name})

    list_time = sorted(list_time, key=itemgetter('total'))
    list_time_total = sorted(to_percentage(list_time_total, max_time_total), key=itemgetter('avg'))
    list_time_http = sorted(to_percentage(list_time_http, max_http_time), key=itemgetter('avg'))
    list_result_count = sorted(to_percentage(list_result_count, max_result_count), key=itemgetter('avg'), reverse=True)
    list_scores = sorted(list_scores, key=itemgetter('avg'), reverse=True)
    list_scores_per_result = sorted(list_scores_per_result, key=itemgetter('avg'), reverse=True)
    list_error_count = sorted(to_percentage(list_error_count, max_error_count), key=itemgetter('avg'), reverse=True)

    return {
        'time': list_time,
        'max_time': math.ceil(max_time_total or 0),
        'time_total': list_time_total,
        'time_http': list_time_http,
        'result_count': list_result_count,
        'scores': list_scores,
        'scores_per_result': list_scores_per_result,
        'error_count': list_error_count,
    }
+2 −5
Original line number Diff line number Diff line
import typing
import inspect
import logging
from json import JSONDecodeError
from urllib.parse import urlparse
from httpx import HTTPError, HTTPStatusError
@@ -9,8 +8,6 @@ from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathExcepti
from searx import logger


logging.basicConfig(level=logging.INFO)

errors_per_engines = {}


@@ -124,7 +121,7 @@ def get_error_context(framerecords, exception_classname, log_message, log_parame
    return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters)


def record_exception(engine_name: str, exc: Exception) -> None:
def count_exception(engine_name: str, exc: Exception) -> None:
    framerecords = inspect.trace()
    try:
        exception_classname = get_exception_classname(exc)
@@ -135,7 +132,7 @@ def record_exception(engine_name: str, exc: Exception) -> None:
        del framerecords


def record_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None:
def count_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None:
    framerecords = list(reversed(inspect.stack()[1:]))
    try:
        error_context = get_error_context(framerecords, None, log_message, log_parameters or ())
+156 −0
Original line number Diff line number Diff line
# SPDX-License-Identifier: AGPL-3.0-or-later

import decimal
import threading

from searx import logger


__all__ = ["Histogram", "HistogramStorage", "CounterStorage"]

logger = logger.getChild('searx.metrics')


class Histogram:

    _slots__ = '_lock', '_size', '_sum', '_quartiles', '_count', '_width'

    def __init__(self, width=10, size=200):
        self._lock = threading.Lock()
        self._width = width
        self._size = size
        self._quartiles = [0] * size
        self._count = 0
        self._sum = 0

    def observe(self, value):
        q = int(value / self._width)
        if q < 0:
            """Value below zero is ignored"""
            q = 0
        if q >= self._size:
            """Value above the maximum is replaced by the maximum"""
            q = self._size - 1
        with self._lock:
            self._quartiles[q] += 1
            self._count += 1
            self._sum += value

    @property
    def quartiles(self):
        return list(self._quartiles)

    @property
    def count(self):
        return self._count

    @property
    def sum(self):
        return self._sum

    @property
    def average(self):
        with self._lock:
            if self._count != 0:
                return self._sum / self._count
            else:
                return 0

    @property
    def quartile_percentage(self):
        ''' Quartile in percentage '''
        with self._lock:
            if self._count > 0:
                return [int(q * 100 / self._count) for q in self._quartiles]
            else:
                return self._quartiles

    @property
    def quartile_percentage_map(self):
        result = {}
        # use Decimal to avoid rounding errors
        x = decimal.Decimal(0)
        width = decimal.Decimal(self._width)
        width_exponent = -width.as_tuple().exponent
        with self._lock:
            if self._count > 0:
                for y in self._quartiles:
                    yp = int(y * 100 / self._count)
                    if yp != 0:
                        result[round(float(x), width_exponent)] = yp
                    x += width
        return result

    def percentage(self, percentage):
        # use Decimal to avoid rounding errors
        x = decimal.Decimal(0)
        width = decimal.Decimal(self._width)
        stop_at_value = decimal.Decimal(self._count) / 100 * percentage
        sum_value = 0
        with self._lock:
            if self._count > 0:
                for y in self._quartiles:
                    sum_value += y
                    if sum_value >= stop_at_value:
                        return x
                    x += width
        return None

    def __repr__(self):
        return "Histogram<avg: " + str(self.average) + ", count: " + str(self._count) + ">"


class HistogramStorage:

    __slots__ = 'measures'

    def __init__(self):
        self.clear()

    def clear(self):
        self.measures = {}

    def configure(self, width, size, *args):
        measure = Histogram(width, size)
        self.measures[args] = measure
        return measure

    def get(self, *args):
        return self.measures.get(args, None)

    def dump(self):
        logger.debug("Histograms:")
        ks = sorted(self.measures.keys(), key='/'.join)
        for k in ks:
            logger.debug("- %-60s %s", '|'.join(k), self.measures[k])


class CounterStorage:

    __slots__ = 'counters', 'lock'

    def __init__(self):
        self.lock = threading.Lock()
        self.clear()

    def clear(self):
        with self.lock:
            self.counters = {}

    def configure(self, *args):
        with self.lock:
            self.counters[args] = 0

    def get(self, *args):
        return self.counters[args]

    def add(self, value, *args):
        with self.lock:
            self.counters[args] += value

    def dump(self):
        with self.lock:
            ks = sorted(self.counters.keys(), key='/'.join)
        logger.debug("Counters:")
        for k in ks:
            logger.debug("- %-60s %s", '|'.join(k), self.counters[k])

searx/metrology/__init__.py

deleted100644 → 0
+0 −0

Empty file deleted.

Loading