[enh] rewrite and enhance metrics (7acd7ffc) · Commits · e / infra / spot

searx/engines/init.py

+0 −114

Original line number	Diff line number	Diff line
		@@ -21,7 +21,6 @@ import threading
		from os.path import realpath, dirname
		from babel.localedata import locale_identifiers
		from urllib.parse import urlparse
		from flask_babel import gettext
		from operator import itemgetter
		from searx import settings
		from searx import logger
		@@ -136,22 +135,6 @@ def load_engine(engine_data):
		setattr(engine, 'fetch_supported_languages',
		lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)))

		engine.stats = {
		'sent_search_count': 0, # sent search
		'search_count': 0, # succesful search
		'result_count': 0,
		'engine_time': 0,
		'engine_time_count': 0,
		'score_count': 0,
		'errors': 0
		}

		engine_type = getattr(engine, 'engine_type', 'online')

		if engine_type != 'offline':
		engine.stats['page_load_time'] = 0
		engine.stats['page_load_count'] = 0

		# tor related settings
		if settings['outgoing'].get('using_tor_proxy'):
		# use onion url if using tor.
		@@ -175,103 +158,6 @@ def load_engine(engine_data):
		return engine


		def to_percentage(stats, maxvalue):
		for engine_stat in stats:
		if maxvalue:
		engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
		else:
		engine_stat['percentage'] = 0
		return stats


		def get_engines_stats(preferences):
		# TODO refactor
		pageloads = []
		engine_times = []
		results = []
		scores = []
		errors = []
		scores_per_result = []

		max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0 # noqa
		for engine in engines.values():
		if not preferences.validate_token(engine):
		continue

		if engine.stats['search_count'] == 0:
		continue

		results_num = \
		engine.stats['result_count'] / float(engine.stats['search_count'])

		if engine.stats['engine_time_count'] != 0:
		this_engine_time = engine.stats['engine_time'] / float(engine.stats['engine_time_count']) # noqa
		else:
		this_engine_time = 0

		if results_num:
		score = engine.stats['score_count'] / float(engine.stats['search_count']) # noqa
		score_per_result = score / results_num
		else:
		score = score_per_result = 0.0

		if engine.engine_type != 'offline':
		load_times = 0
		if engine.stats['page_load_count'] != 0:
		load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count']) # noqa
		max_pageload = max(load_times, max_pageload)
		pageloads.append({'avg': load_times, 'name': engine.name})

		max_engine_times = max(this_engine_time, max_engine_times)
		max_results = max(results_num, max_results)
		max_score = max(score, max_score)
		max_score_per_result = max(score_per_result, max_score_per_result)
		max_errors = max(max_errors, engine.stats['errors'])

		engine_times.append({'avg': this_engine_time, 'name': engine.name})
		results.append({'avg': results_num, 'name': engine.name})
		scores.append({'avg': score, 'name': engine.name})
		errors.append({'avg': engine.stats['errors'], 'name': engine.name})
		scores_per_result.append({
		'avg': score_per_result,
		'name': engine.name
		})

		pageloads = to_percentage(pageloads, max_pageload)
		engine_times = to_percentage(engine_times, max_engine_times)
		results = to_percentage(results, max_results)
		scores = to_percentage(scores, max_score)
		scores_per_result = to_percentage(scores_per_result, max_score_per_result)
		errors = to_percentage(errors, max_errors)

		return [
		(
		gettext('Engine time (sec)'),
		sorted(engine_times, key=itemgetter('avg'))
		),
		(
		gettext('Page loads (sec)'),
		sorted(pageloads, key=itemgetter('avg'))
		),
		(
		gettext('Number of results'),
		sorted(results, key=itemgetter('avg'), reverse=True)
		),
		(
		gettext('Scores'),
		sorted(scores, key=itemgetter('avg'), reverse=True)
		),
		(
		gettext('Scores per result'),
		sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
		),
		(
		gettext('Errors'),
		sorted(errors, key=itemgetter('avg'), reverse=True)
		),
		]


		def load_engines(engine_list):
		global engines, engine_shortcuts
		engines.clear()

searx/metrics/init.py

0 → 100644

+206 −0

Original line number	Diff line number	Diff line
		# SPDX-License-Identifier: AGPL-3.0-or-later

		import typing
		import math
		import contextlib
		from timeit import default_timer
		from operator import itemgetter

		from searx.engines import engines
		from .models import HistogramStorage, CounterStorage
		from .error_recorder import count_error, count_exception, errors_per_engines

		__all__ = ["initialize",
		"get_engines_stats", "get_engine_errors",
		"histogram", "histogram_observe", "histogram_observe_time",
		"counter", "counter_inc", "counter_add",
		"count_error", "count_exception"]


		ENDPOINTS = {'search'}


		histogram_storage: typing.Optional[HistogramStorage] = None
		counter_storage: typing.Optional[CounterStorage] = None


		@contextlib.contextmanager
		def histogram_observe_time(*args):
		h = histogram_storage.get(*args)
		before = default_timer()
		yield before
		duration = default_timer() - before
		if h:
		h.observe(duration)
		else:
		raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")


		def histogram_observe(duration, *args):
		histogram_storage.get(*args).observe(duration)


		def histogram(*args, raise_on_not_found=True):
		h = histogram_storage.get(*args)
		if raise_on_not_found and h is None:
		raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
		return h


		def counter_inc(*args):
		counter_storage.add(1, *args)


		def counter_add(value, *args):
		counter_storage.add(value, *args)


		def counter(*args):
		return counter_storage.get(*args)


		def initialize(engine_names=None):
		"""
		Initialize metrics
		"""
		global counter_storage, histogram_storage

		counter_storage = CounterStorage()
		histogram_storage = HistogramStorage()

		# max_timeout = max of all the engine.timeout
		max_timeout = 2
		for engine_name in (engine_names or engines):
		if engine_name in engines:
		max_timeout = max(max_timeout, engines[engine_name].timeout)

		# histogram configuration
		histogram_width = 0.1
		histogram_size = int(1.5 * max_timeout / histogram_width)

		# engines
		for engine_name in (engine_names or engines):
		# search count
		counter_storage.configure('engine', engine_name, 'search', 'count', 'sent')
		counter_storage.configure('engine', engine_name, 'search', 'count', 'successful')
		# global counter of errors
		counter_storage.configure('engine', engine_name, 'search', 'count', 'error')
		# score of the engine
		counter_storage.configure('engine', engine_name, 'score')
		# result count per requests
		histogram_storage.configure(1, 100, 'engine', engine_name, 'result', 'count')
		# time doing HTTP requests
		histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http')
		# total time
		# .time.request and ...response times may overlap .time.http time.
		histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total')


		def get_engine_errors(engline_list):
		result = {}
		engine_names = list(errors_per_engines.keys())
		engine_names.sort()
		for engine_name in engine_names:
		if engine_name not in engline_list:
		continue

		error_stats = errors_per_engines[engine_name]
		sent_search_count = max(counter('engine', engine_name, 'search', 'count', 'sent'), 1)
		sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1])
		r = []
		for context, count in sorted_context_count_list:
		percentage = round(20 * count / sent_search_count) * 5
		r.append({
		'filename': context.filename,
		'function': context.function,
		'line_no': context.line_no,
		'code': context.code,
		'exception_classname': context.exception_classname,
		'log_message': context.log_message,
		'log_parameters': context.log_parameters,
		'secondary': context.secondary,
		'percentage': percentage,
		})
		result[engine_name] = sorted(r, reverse=True, key=lambda d: d['percentage'])
		return result


		def to_percentage(stats, maxvalue):
		for engine_stat in stats:
		if maxvalue:
		engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
		else:
		engine_stat['percentage'] = 0
		return stats


		def get_engines_stats(engine_list):
		global counter_storage, histogram_storage

		assert counter_storage is not None
		assert histogram_storage is not None

		list_time = []
		list_time_http = []
		list_time_total = []
		list_result_count = []
		list_error_count = []
		list_scores = []
		list_scores_per_result = []

		max_error_count = max_http_time = max_time_total = max_result_count = max_score = None # noqa
		for engine_name in engine_list:
		error_count = counter('engine', engine_name, 'search', 'count', 'error')

		if counter('engine', engine_name, 'search', 'count', 'sent') > 0:
		list_error_count.append({'avg': error_count, 'name': engine_name})
		max_error_count = max(error_count, max_error_count or 0)

		successful_count = counter('engine', engine_name, 'search', 'count', 'successful')
		if successful_count == 0:
		continue

		result_count_sum = histogram('engine', engine_name, 'result', 'count').sum
		time_total = histogram('engine', engine_name, 'time', 'total').percentage(50)
		time_http = histogram('engine', engine_name, 'time', 'http').percentage(50)
		result_count = result_count_sum / float(successful_count)

		if result_count:
		score = counter('engine', engine_name, 'score') # noqa
		score_per_result = score / float(result_count_sum)
		else:
		score = score_per_result = 0.0

		max_time_total = max(time_total, max_time_total or 0)
		max_http_time = max(time_http, max_http_time or 0)
		max_result_count = max(result_count, max_result_count or 0)
		max_score = max(score, max_score or 0)

		list_time.append({'total': round(time_total, 1),
		'http': round(time_http, 1),
		'name': engine_name,
		'processing': round(time_total - time_http, 1)})
		list_time_total.append({'avg': time_total, 'name': engine_name})
		list_time_http.append({'avg': time_http, 'name': engine_name})
		list_result_count.append({'avg': result_count, 'name': engine_name})
		list_scores.append({'avg': score, 'name': engine_name})
		list_scores_per_result.append({'avg': score_per_result, 'name': engine_name})

		list_time = sorted(list_time, key=itemgetter('total'))
		list_time_total = sorted(to_percentage(list_time_total, max_time_total), key=itemgetter('avg'))
		list_time_http = sorted(to_percentage(list_time_http, max_http_time), key=itemgetter('avg'))
		list_result_count = sorted(to_percentage(list_result_count, max_result_count), key=itemgetter('avg'), reverse=True)
		list_scores = sorted(list_scores, key=itemgetter('avg'), reverse=True)
		list_scores_per_result = sorted(list_scores_per_result, key=itemgetter('avg'), reverse=True)
		list_error_count = sorted(to_percentage(list_error_count, max_error_count), key=itemgetter('avg'), reverse=True)

		return {
		'time': list_time,
		'max_time': math.ceil(max_time_total or 0),
		'time_total': list_time_total,
		'time_http': list_time_http,
		'result_count': list_result_count,
		'scores': list_scores,
		'scores_per_result': list_scores_per_result,
		'error_count': list_error_count,
		}

searx/metrology/error_recorder.py→searx/metrics/error_recorder.py

+2 −5

Original line number	Diff line number	Diff line
		import typing
		import inspect
		import logging
		from json import JSONDecodeError
		from urllib.parse import urlparse
		from httpx import HTTPError, HTTPStatusError
		@@ -9,8 +8,6 @@ from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathExcepti
		from searx import logger


		logging.basicConfig(level=logging.INFO)

		errors_per_engines = {}


		@@ -124,7 +121,7 @@ def get_error_context(framerecords, exception_classname, log_message, log_parame
		return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters)


		def record_exception(engine_name: str, exc: Exception) -> None:
		def count_exception(engine_name: str, exc: Exception) -> None:
		framerecords = inspect.trace()
		try:
		exception_classname = get_exception_classname(exc)
		@@ -135,7 +132,7 @@ def record_exception(engine_name: str, exc: Exception) -> None:
		del framerecords


		def record_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None:
		def count_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None:
		framerecords = list(reversed(inspect.stack()[1:]))
		try:
		error_context = get_error_context(framerecords, None, log_message, log_parameters or ())

searx/metrics/models.py

0 → 100644

+156 −0

Original line number	Diff line number	Diff line
		# SPDX-License-Identifier: AGPL-3.0-or-later

		import decimal
		import threading

		from searx import logger


		__all__ = ["Histogram", "HistogramStorage", "CounterStorage"]

		logger = logger.getChild('searx.metrics')


		class Histogram:

		_slots__ = '_lock', '_size', '_sum', '_quartiles', '_count', '_width'

		def __init__(self, width=10, size=200):
		self._lock = threading.Lock()
		self._width = width
		self._size = size
		self._quartiles = [0] * size
		self._count = 0
		self._sum = 0

		def observe(self, value):
		q = int(value / self._width)
		if q < 0:
		"""Value below zero is ignored"""
		q = 0
		if q >= self._size:
		"""Value above the maximum is replaced by the maximum"""
		q = self._size - 1
		with self._lock:
		self._quartiles[q] += 1
		self._count += 1
		self._sum += value

		@property
		def quartiles(self):
		return list(self._quartiles)

		@property
		def count(self):
		return self._count

		@property
		def sum(self):
		return self._sum

		@property
		def average(self):
		with self._lock:
		if self._count != 0:
		return self._sum / self._count
		else:
		return 0

		@property
		def quartile_percentage(self):
		''' Quartile in percentage '''
		with self._lock:
		if self._count > 0:
		return [int(q * 100 / self._count) for q in self._quartiles]
		else:
		return self._quartiles

		@property
		def quartile_percentage_map(self):
		result = {}
		# use Decimal to avoid rounding errors
		x = decimal.Decimal(0)
		width = decimal.Decimal(self._width)
		width_exponent = -width.as_tuple().exponent
		with self._lock:
		if self._count > 0:
		for y in self._quartiles:
		yp = int(y * 100 / self._count)
		if yp != 0:
		result[round(float(x), width_exponent)] = yp
		x += width
		return result

		def percentage(self, percentage):
		# use Decimal to avoid rounding errors
		x = decimal.Decimal(0)
		width = decimal.Decimal(self._width)
		stop_at_value = decimal.Decimal(self._count) / 100 * percentage
		sum_value = 0
		with self._lock:
		if self._count > 0:
		for y in self._quartiles:
		sum_value += y
		if sum_value >= stop_at_value:
		return x
		x += width
		return None

		def __repr__(self):
		return "Histogram<avg: " + str(self.average) + ", count: " + str(self._count) + ">"


		class HistogramStorage:

		__slots__ = 'measures'

		def __init__(self):
		self.clear()

		def clear(self):
		self.measures = {}

		def configure(self, width, size, *args):
		measure = Histogram(width, size)
		self.measures[args] = measure
		return measure

		def get(self, *args):
		return self.measures.get(args, None)

		def dump(self):
		logger.debug("Histograms:")
		ks = sorted(self.measures.keys(), key='/'.join)
		for k in ks:
		logger.debug("- %-60s %s", '\|'.join(k), self.measures[k])


		class CounterStorage:

		__slots__ = 'counters', 'lock'

		def __init__(self):
		self.lock = threading.Lock()
		self.clear()

		def clear(self):
		with self.lock:
		self.counters = {}

		def configure(self, *args):
		with self.lock:
		self.counters[args] = 0

		def get(self, *args):
		return self.counters[args]

		def add(self, value, *args):
		with self.lock:
		self.counters[args] += value

		def dump(self):
		with self.lock:
		ks = sorted(self.counters.keys(), key='/'.join)
		logger.debug("Counters:")
		for k in ks:
		logger.debug("- %-60s %s", '\|'.join(k), self.counters[k])

searx/metrology/init.py

deleted100644 → 0

+0 −0

Empty file deleted.