From ebd5dee0262db73b6a56c028ac9e88b832e2646f Mon Sep 17 00:00:00 2001 From: Nicolas Gelot Date: Tue, 4 Feb 2020 23:50:21 +0100 Subject: [PATCH] Add cache feature on valid http requests Redis LRU cache is added on http requests, only response with a valid status code are cached during 1 day. Close: #50 --- README.md | 10 +++++++--- docker-compose.yml | 10 ++++++++++ requirements.txt | 2 ++ searx/__init__.py | 2 ++ searx/cache.py | 43 +++++++++++++++++++++++++++++++++++++++++++ searx/poolrequests.py | 26 +++++++++++++++++++++----- 6 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 searx/cache.py diff --git a/README.md b/README.md index d07876536..955629f40 100644 --- a/README.md +++ b/README.md @@ -9,12 +9,13 @@ Spot was forked from searx: read [documentation](https://asciimoo.github.io/sear ## Changes between Spot and Searx * eelo theme +* redis cache on http requests (TTL 1 day) * docker packaging thinking to be production ready * better locale support ## Architecture -6 services are used for production: +7 services are used for production: * [traefik](https://docs.traefik.io/) as edge router to publish services. * [filtron](https://github.com/asciimoo/filtron) as reverse HTTP proxy to filter requests by different rules. @@ -22,6 +23,7 @@ Spot was forked from searx: read [documentation](https://asciimoo.github.io/sear * [nginx](https://www.nginx.com/) as http server to serve static files. * Spot the meta search engine. * [tor](https://www.torproject.org) as open network that helps you defend against traffic analysis. +* [redis](https://redis.io/) as memory storage to cache http requests ```mermaid @@ -35,6 +37,7 @@ graph TD E --> H(tor1) E --> I(tor2) E --> J(torN) + E --> |cache| K(redis) ``` ## Getting Started @@ -63,8 +66,9 @@ You can directly run spot, with a python command inside a docker container which contains all dependencies. ``` -docker run -it --rm -v $(pwd):/ws -w /ws registry.gitlab.e.foundation:5000/e/cloud/my-spot/env sh -SEARX_DEBUG=1 python -X dev searx/webapp.py +docker-compose up -d redis +docker run -it --rm -v $(pwd):/ws -w /ws --network=my-spot_default registry.gitlab.e.foundation:5000/e/cloud/my-spot/env sh +PYTHONPATH=$(pwd) SEARX_REDIS_HOST=redis SEARX_DEBUG=1 python -X dev searx/webapp.py ``` Then, open your browser and navigate to the container IP. diff --git a/docker-compose.yml b/docker-compose.yml index 68d3b1a36..42e8b2a56 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,6 +8,15 @@ x-logging: driver: json-file services: + redis: + image: redis:5.0.7-alpine + logging: *default-logging + restart: unless-stopped + command: + - "redis-server" + - "--maxmemory 20G" + - "--maxmemory-policy allkeys-lru" + spot: image: ${SPOT_DOCKER_IMG}:${SPOT_DOCKER_TAG} logging: *default-logging @@ -18,6 +27,7 @@ services: SEARX_MORTY_KEY: "${SEARX_MORTY_KEY:-KHN0ZGluKT0gNWNmNzQ0Y2JlNjI4MDRjODAwZGUyMGY5ZjZlZTFmZWI1NTg2YTg5OAo=}" SEARX_PROXY_HTTP: "socks5://tor:9050" SEARX_PROXY_HTTPS: "socks5://tor:9050" + SEARX_REDIS_HOST: "redis" GUNICORN_LOGGER: 1 GUNICORN_LEVEL: INFO diff --git a/requirements.txt b/requirements.txt index ea4a5a7a0..566053afa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,5 @@ pyopenssl==19.0.0 python-dateutil==2.8.0 pyyaml==5.1 requests[socks]==2.22.0 +redis==3.4.1 +ring==0.7.3 diff --git a/searx/__init__.py b/searx/__init__.py index da16e6ff9..89971127f 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -108,3 +108,5 @@ if 'SEARX_PROXY_HTTP' in environ: settings['outgoing'].setdefault('proxies', {})['http'] = environ['SEARX_PROXY_HTTP'] if 'SEARX_PROXY_HTTPS' in environ: settings['outgoing'].setdefault('proxies', {})['https'] = environ['SEARX_PROXY_HTTPS'] +if 'SEARX_REDIS_HOST' in environ: + settings['server']['redis_host'] = environ['SEARX_REDIS_HOST'] diff --git a/searx/cache.py b/searx/cache.py new file mode 100644 index 000000000..f089bdc89 --- /dev/null +++ b/searx/cache.py @@ -0,0 +1,43 @@ +import functools + +import redis +import ring + +from ring.func import base as fbase +from ring.func.sync import CacheUserInterface + +from searx import settings + +redis_cache = None + + +class RequestCacheUserInterface(CacheUserInterface): + @fbase.interface_attrs( + transform_args=fbase.transform_kwargs_only, return_annotation=str) + def key(self, wire, **kwargs): + kwargs["kwargs"] = {} + return wire._rope.compose_key(*wire._bound_objects, **kwargs) + + @fbase.interface_attrs(transform_args=fbase.transform_kwargs_only) + def get_or_update(self, wire, **kwargs): + key = self.key(wire, **kwargs) + try: + result = wire.storage.get(key) + except fbase.NotFound: + result = self.execute(wire, **kwargs) + if result.status_code >= 300: + return result + wire.storage.set(key, result) + return result + + +if "redis_host" in settings["server"]: + client = redis.StrictRedis(host=settings["server"]["redis_host"]) + + redis_cache = functools.partial( + ring.redis, + client, + coder="pickle", + user_interface=RequestCacheUserInterface, + expire=86400 + ) diff --git a/searx/poolrequests.py b/searx/poolrequests.py index f9a9d7719..c8722e6fe 100644 --- a/searx/poolrequests.py +++ b/searx/poolrequests.py @@ -3,6 +3,7 @@ import requests from itertools import cycle from threading import RLock, local from searx import settings +from searx.cache import redis_cache from time import time @@ -128,9 +129,10 @@ def request(method, url, **kwargs): return response -def get(url, **kwargs): - kwargs.setdefault('allow_redirects', True) - return request('get', url, **kwargs) +if "redis_host" not in settings["server"]: + def get(url, **kwargs): + kwargs.setdefault('allow_redirects', True) + return request('get', url, **kwargs) def options(url, **kwargs): @@ -143,8 +145,9 @@ def head(url, **kwargs): return request('head', url, **kwargs) -def post(url, data=None, **kwargs): - return request('post', url, data=data, **kwargs) +if "redis_host" not in settings["server"]: + def post(url, data=None, **kwargs): + return request('post', url, data=data, **kwargs) def put(url, data=None, **kwargs): @@ -157,3 +160,16 @@ def patch(url, data=None, **kwargs): def delete(url, **kwargs): return request('delete', url, **kwargs) + + +if "redis_host" in settings["server"]: + @redis_cache() + def get(url, **kwargs): + kwargs = kwargs.get("kwargs", kwargs) + kwargs.setdefault('allow_redirects', True) + return request('get', url, **kwargs) + + @redis_cache() + def post(url, data=None, **kwargs): + kwargs = kwargs.get("kwargs", kwargs) + return request('post', url, data=data, **kwargs) -- GitLab