From 7721632c7fe154e90b7bee70ebe5a129d26000e1 Mon Sep 17 00:00:00 2001 From: Nicolas Gelot Date: Thu, 11 Apr 2019 23:43:04 +0200 Subject: [PATCH] Add cache interface In order to use another cache tool or to use spot without cache system. --- searx/__init__.py | 1 + searx/search.py | 40 +++++----- searx/search_database.py | 158 ++++++++++++++++++++++----------------- searx/settings.yml | 1 + searx/webapp.py | 15 ++-- 5 files changed, 120 insertions(+), 95 deletions(-) diff --git a/searx/__init__.py b/searx/__init__.py index 9dc594431..7324c1e4b 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -89,6 +89,7 @@ if 'BASE_URL' in environ: if 'IMAGE_PROXY' in environ: settings['server']['image_proxy'] = environ['IMAGE_PROXY'] if 'SEARX_REDIS_HOST' in environ: + settings['redis']['enable'] = True settings['redis']['host'] = environ['SEARX_REDIS_HOST'] if 'HTTP_PROXY_URL' in environ: settings['proxies']['http'] = environ['HTTP_PROXY_URL'] diff --git a/searx/search.py b/searx/search.py index 118eddf1f..b208908c5 100644 --- a/searx/search.py +++ b/searx/search.py @@ -185,27 +185,25 @@ def default_request_params(): } -def search(request): - """ Entry point to perform search request on engines - """ - search = Search() - search_query = search.get_search_query_from_webapp(request.preferences, request.form) - searchData = search_database.read(search_query) - if searchData is None: - result_container = search.search(search_query) - searchData = search.create_search_data(search_query, result_container) - threading.Thread( - target=search_database.save, - args=(searchData,), - name='save_search_' + str(searchData) - ).start() - - search.search_with_plugins(request, searchData) - return searchData - - -class Search(object): - """Search information container""" +class Search: + """Search information manager""" + + def __init__(self, cachecls=search_database.CacheInterface): + self.cache = cachecls() + + def __call__(self, request): + """ Entry point to perform search request on engines + """ + search_query = self.get_search_query_from_webapp(request.preferences, request.form) + searchData = self.cache.read(search_query) + + if searchData is None: + result_container = self.search(search_query) + searchData = self.create_search_data(search_query, result_container) + self.cache.save(searchData) + + self.search_with_plugins(request, searchData) + return searchData def search(self, search_query): """ do search-request diff --git a/searx/search_database.py b/searx/search_database.py index 29da205f4..e50fbde24 100644 --- a/searx/search_database.py +++ b/searx/search_database.py @@ -1,3 +1,4 @@ +import threading import redis import pickle @@ -5,81 +6,104 @@ from searx import settings from searx.query import SearchQuery -def make_key(q): - if q.time_range is None: - q.time_range = "" +class CacheInterface: + """ Cache interface to store SearchData object + """ + def read(self, q): + pass - return "SEARCH_HISTORY:{}:{}:{}:{}:{}:{}:{}".format( - q.query, - q.engines, - q.categories[0], - q.language, - q.safesearch, - q.pageno, - q.time_range, - ) + def save(self, d): + pass + def update(self, d): + pass -def _get_connection(): - return redis.Redis(host=settings['redis']['host']) + def get_twenty_queries(self, x): + return [] -def read(q): - conn = _get_connection() - key = make_key(q) - response = conn.get(key) - if not response: - return None - return pickle.loads(response) +class RedisCache(CacheInterface): + def __init__(self): + self.pool = redis.ConnectionPool(host=settings['redis']['host']) + self.running = threading.Event() + def make_key(self, q): + if q.time_range is None: + q.time_range = "" -def save(d): - conn = _get_connection() - key = make_key(d) - history = conn.incr("SEARCH_HISTORY_INDEX") - conn.zadd("SEARCH_HISTORY_KEYS", {key: history}) - conn.set(key, pickle.dumps(d, protocol=4)) - + return "SEARCH_HISTORY:{}:{}:{}:{}:{}:{}:{}".format( + q.query, + q.engines, + q.categories[0], + q.language, + q.safesearch, + q.pageno, + q.time_range, + ) -def get_twenty_queries(x): - result = [] + def _get_connection(self): + return redis.Redis(connection_pool=self.pool) + + def read(self, q): + conn = self._get_connection() + key = self.make_key(q) + response = conn.get(key) + if not response: + return None + return pickle.loads(response) + + def _save(self, d): + conn = self._get_connection() + key = self.make_key(d) + history = conn.incr("SEARCH_HISTORY_INDEX") + conn.zadd("SEARCH_HISTORY_KEYS", {key: history}) + conn.set(key, pickle.dumps(d, protocol=4)) + + def save(self, d): + threading.Thread( + target=self._save, + args=(d,), + name='save_search_' + str(d) + ).start() + + def get_twenty_queries(self, x): + result = [] + + conn = self._get_connection() + keys = conn.zrange('SEARCH_HISTORY_KEYS', int(x), int(x) + 20) + if not keys: + return result + + pipe = conn.pipeline() + for key in keys: + pipe.get(key) + output = pipe.execute() + for row in output: + row = pickle.loads(row) + result.append( + SearchQuery( + row.query, + row.engines, + row.categories, + row.language, + row.safesearch, + row.pageno, + row.time_range, + ) + ) - conn = _get_connection() - keys = conn.zrange('SEARCH_HISTORY_KEYS', int(x), int(x) + 20) - if not keys: return result - pipe = conn.pipeline() - for key in keys: - pipe.get(key) - output = pipe.execute() - for row in output: - row = pickle.loads(row) - result.append( - SearchQuery( - row.query, - row.engines, - row.categories, - row.language, - row.safesearch, - row.pageno, - row.time_range, - ) - ) - - return result - - -def update(d): - conn = _get_connection() - key = make_key(d) - current = read(d) - current.results = d.results - current.paging = d.paging - current.results_number = d.results_number - current.answers = d.answers - current.corrections = d.corrections - current.infoboxes = d.infoboxes - current.suggestions = d.suggestions - current.unresponsive_engines = d.unresponsive_engines - conn.set(key, pickle.dumps(current, protocol=4)) + def update(self, d): + conn = self._get_connection() + key = self.make_key(d) + current = self.read(d) + current.results = d.results + current.paging = d.paging + current.results_number = d.results_number + current.answers = d.answers + current.corrections = d.corrections + current.infoboxes = d.infoboxes + current.suggestions = d.suggestions + current.unresponsive_engines = d.unresponsive_engines + conn.set(key, pickle.dumps(current, protocol=4)) diff --git a/searx/settings.yml b/searx/settings.yml index 929da6680..d511fa320 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -17,6 +17,7 @@ server: http_protocol_version : "1.0" # 1.0 and 1.1 are supported redis: + enable: False host : "127.0.0.1" user : "searx" password : "password" # change this! diff --git a/searx/webapp.py b/searx/webapp.py index f6f9c0063..84f9a504e 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -33,7 +33,7 @@ import copy import requests -from searx import logger, search_database +from searx import logger logger = logger.getChild('webapp') @@ -68,7 +68,8 @@ from searx.utils import ( ) from searx.version import VERSION_STRING from searx.languages import language_codes as languages -from searx.search import Search, search +from searx.search import Search +from searx.search_database import RedisCache from searx.query import RawTextQuery from searx.autocomplete import searx_bang, backends as autocomplete_backends from searx.plugins import plugins @@ -77,7 +78,6 @@ from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.answerers import answerers from searx.url_utils import urlencode, urlparse, urljoin from searx.utils import new_hmac -from searx.search_database import get_twenty_queries import threading # check if the pyopenssl package is installed. @@ -131,6 +131,8 @@ if not searx_debug \ babel = Babel(app) +search = Search(RedisCache) if settings["redis"]["enable"] else Search() + rtl_locales = ['ar', 'arc', 'bcc', 'bqi', 'ckb', 'dv', 'fa', 'glk', 'he', 'ku', 'mzn', 'pnb', 'ps', 'sd', 'ug', 'ur', 'yi'] @@ -842,18 +844,17 @@ def wait_updating(start_time): def update_results(): - search = Search() start_time = time.time() x = 0 while not running.is_set(): - queries = get_twenty_queries(x) + queries = search.cache.get_twenty_queries(x) for query in queries: result_container = search.search(query) searchData = search.create_search_data(query, result_container) - search_database.update(searchData) + search.cache.update(searchData) if running.is_set(): return - x += 20 + x += len(queries) if len(queries) < 20: x = 0 wait_updating(start_time) -- GitLab