diff --git a/.env b/.env index f7eba7e87e06772171c8353f887ea28172470399..45e8d6fbcd5f747a56d0d8b6ea6f19510200c39b 100644 --- a/.env +++ b/.env @@ -1,11 +1,8 @@ SPOT_HOSTNAME=spot.ecloud.global SPOT_DOCKER_TAG=latest -SEARX_MORTY_URL=https://localhost:8089 SEARX_SECRET=":@)%NN0+OqNdy:{prWQlZ{p9|oO9p-UyJq@%V!~G:arrSx6fXz.{jd%=XF44ncj" SEARX_REDIS_HOST=redis SEARX_UI_DEFAULT_THEME=etheme -SEARX_PROXY_HTTP=socks5h://tor-socks-proxy:9150 -SEARX_PROXY_HTTPS=socks5h://tor-socks-proxy:9150 -FILTRON_PORT=8088 +NGINX_PORT=8088 NODE_NAME=local CI_REGISTRY_IMAGE=registry.gitlab.e.foundation/e/infra/spot diff --git a/.env.prod b/.env.prod index 9ed4a2762759d7736e1ce0de6bc4a418cc50a5a7..4c3a34925f012eb6773f37d032ac84c24ca7dd1a 100644 --- a/.env.prod +++ b/.env.prod @@ -5,8 +5,8 @@ SEARX_REDIS_HOST=redis SEARX_UI_DEFAULT_THEME=etheme SEARX_PROXY_HTTP=http://proxy01.ecloud.global:1099 SEARX_PROXY_HTTPS=http://proxy01.ecloud.global:1099 -WIREGUARD_IP=127.0.0.1 -FILTRON_PORT=8088 +WIREGUARD_IP=127.0.0.1 +NGINX_PORT=8088 INTERNAL_NETWORK_NAME=default NODE_NAME=spot11 CI_REGISTRY_IMAGE=image diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 68e5f0e73d76fadb8ff300aa16e33701f746d563..036d4e0e234f51a917d7c559a51adf0584971bd9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,8 +6,8 @@ stages: - test - deploy -services: - - docker:20.10-dind +services: + - docker:20.10-dind python: stage: check @@ -64,12 +64,12 @@ test:unit: - chmod 644 ~/.ssh/known_hosts ~/.ssh/id_ed25519.pub - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY script: - - echo "Deploying to ($SSH_USER@$BACKEND_HOST)" - - ssh $SSH_USER@$BACKEND_HOST "cd $DEPLOYMENT_PATH && git stash && git pull && git checkout $BRANCH && rm .env .env.prod && rm docker-compose.yml && mv docker-compose.prod.yml docker-compose.yml" + - echo "Deploying to ($SSH_USER@$BACKEND_HOST)" + - ssh $SSH_USER@$BACKEND_HOST "cd $DEPLOYMENT_PATH && git stash && git fetch && git checkout $BRANCH && git reset --hard origin/$BRANCH && rm .env .env.prod && rm docker-compose.yml && mv docker-compose.prod.yml docker-compose.yml" - scp $ENV_FILE $SSH_USER@$BACKEND_HOST:$DEPLOYMENT_PATH/.env - ssh $SSH_USER@$BACKEND_HOST "cd $DEPLOYMENT_PATH - && sed -i 's/SPOT_DOCKER_TAG=master/SPOT_DOCKER_TAG=$SPOT_DOCKER_TAG/g' .env - && docker-compose pull && docker-compose stop filtron && sleep 2 && docker-compose up -d && docker-compose restart spot-nginx" + && sed -i 's/SPOT_DOCKER_TAG=master/SPOT_DOCKER_TAG=$SPOT_DOCKER_TAG/g' .env + && docker-compose pull && sleep 2 && docker-compose up -d && docker-compose restart spot-nginx" deploy:spot.murenatest.io.backend1: extends: .deploy:template diff --git a/README.md b/README.md index 24cd0e0c3c4fbfc3de93385383500cfb21218e44..bff205f5f762394cc3e80ef24be446284c16f77e 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,8 @@ Spot was forked from searx: read [documentation](https://asciimoo.github.io/sear ## Architecture -6 services are used for production: +3 services are used for production: -* [filtron](https://github.com/asciimoo/filtron) as reverse HTTP proxy to filter requests by different rules. * [nginx](https://www.nginx.com/) as http server to serve static files. * Spot the meta search engine. * [redis](https://redis.io/) as memory storage to cache http requests @@ -24,8 +23,7 @@ Spot was forked from searx: read [documentation](https://asciimoo.github.io/sear ```mermaid graph TD - A(reverse proxy) --> |http://localhost:8088| B(filtron) - B --> C(nginx) + A(reverse proxy) --> |http://localhost:8088| C(nginx) C --> |static file| C C --> |API| D(spot) D --> E(proxy service) @@ -56,10 +54,10 @@ below to run spot for production or local environment. Run the docker-compose to start the project ``` -docker-compose up -d +docker-compose up -d --build ``` -Then go to http://localhost:8088. +Then go to http://localhost:8100. ### For developer diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 325f0229b0ed54e4108728833cf0012148ab5ab6..98daf2a76f7869025c9da1d9b261498026937e96 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -26,6 +26,7 @@ services: SEARX_PROXY_HTTP: "${SEARX_PROXY_HTTP}" SEARX_PROXY_HTTPS: "${SEARX_PROXY_HTTPS}" SEARX_REDIS_HOST: "${SEARX_REDIS_HOST}" + SEARXNG_REDIS_URL: "redis://${SEARX_REDIS_HOST}:6379" SEARX_UI_DEFAULT_THEME: "${SEARX_UI_DEFAULT_THEME}" GUNICORN_LOGGER: 1 GUNICORN_LEVEL: INFO @@ -43,6 +44,8 @@ services: tag: docker-${ENVIRONMENT_NAME}-spot-nginx networks: - ${INTERNAL_NETWORK_NAME} + ports: + - ${WIREGUARD_IP}:${NGINX_PORT}:80 volumes: - ./etc/nginx/conf.d/spot.conf:/etc/nginx/conf.d/default.conf - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf @@ -51,18 +54,6 @@ services: labels: - "com.centurylinklabs.watchtower.scope=staging-spot" - filtron: - image: dalf/filtron:latest - restart: always - command: -listen :3000 -rules /etc/filtron/rules.json -target spot-nginx - networks: - - ${INTERNAL_NETWORK_NAME} - - spot-wireguarded - ports: - - ${WIREGUARD_IP}:${FILTRON_PORT}:3000 - volumes: - - ./etc/filtron/rules.json:/etc/filtron/rules.json - watchtower: image: containrrr/watchtower volumes: diff --git a/docker-compose.yml b/docker-compose.yml index 593f6d8d7618d7aae36380f5d8c1b53b41dce04f..3b0a15e82011196e3c96e1516f485a9158f4b2d8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,7 +12,7 @@ services: image: redis:5-alpine logging: *default-logging restart: unless-stopped - command: redis-server --maxmemory 8G --maxmemory-policy allkeys-lru --requirepass foobar + command: redis-server --maxmemory 8G --maxmemory-policy allkeys-lru spot: image: ${CI_REGISTRY_IMAGE}:${SPOT_DOCKER_TAG} @@ -20,18 +20,24 @@ services: context: . dockerfile: Dockerfile logging: *default-logging - ports: - - 8100:8080 restart: unless-stopped environment: SEARX_SECRET: "${SEARX_SECRET}" - SEARX_PROXY_HTTP: "${SEARX_PROXY_HTTP}" - SEARX_PROXY_HTTPS: "${SEARX_PROXY_HTTPS}" SEARX_REDIS_HOST: "${SEARX_REDIS_HOST}" + SEARXNG_REDIS_URL: "redis://${SEARX_REDIS_HOST}:6379" SEARX_UI_DEFAULT_THEME: "${SEARX_UI_DEFAULT_THEME}" GUNICORN_LOGGER: 1 GUNICORN_LEVEL: INFO NODE_NAME: ${NODE_NAME} - + nginx: + image: nginx:1.23 + restart: always + ports: + - 8100:80 + volumes: + - ./etc/nginx/conf.d/spot.conf:/etc/nginx/conf.d/default.conf + - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf + - ./etc/nginx/proxy_spot_params:/etc/nginx/proxy_spot_params + - ./searx/static:/var/www/spot/static diff --git a/etc/filtron/rules.json b/etc/filtron/rules.json deleted file mode 100644 index 9752d7419ea24bde811b19d20a8cb06dd242a25f..0000000000000000000000000000000000000000 --- a/etc/filtron/rules.json +++ /dev/null @@ -1,118 +0,0 @@ -[ - { - "name": "searx.space", - "filters": ["Header:X-Forwarded-For=nslookup(check.searx.space)"], - "stop": true, - "actions": [{ "name": "log"}] - }, - { - "name": "IP limit, all paths except image proxy", - "filters": ["Param:url", "Path=^(!image_proxy)$"], - "interval": 3, - "limit": 25, - "aggregations": ["Header:X-Forwarded-For"], - "actions": [ - {"name": "block", - "params": {"message": "Rate limit exceeded 9001, try again later."}} - ] - }, - { - "name": "useragent limit, all paths except image_proxy", - "filters": ["Param:url", "Path=^(!image_proxy)$"], - "interval": 30, - "limit": 200, - "aggregations": ["Header:X-Forwarded-For", "Header:User-Agent"], - "stop": true, - "actions": [ - {"name": "block", - "params": {"message": "Rate limit exceeded 9002, try again later."}} - ] - }, - { - "name": "search request", - "filters": ["Param:q", "Path=^(/|/search)$"], - "subrules": [ - { - "name": "allow Firefox Android (issue #48 and #60)", - "filters": [ - "Param:q=^1$", - "Header:User-Agent=(^MozacFetch/[0-9]{2,3}.[0-9].[0-9]+$|^Mozilla/5.0 \\(Android [0-9]{1,2}(.[0-9]{1,2}.[0-9]{1,2})?; Mobile; rv:[0-9]{2,3}.[0-9]\\) Gecko/[0-9]{2,3}.[0-9] Firefox/[0-9]{2,3}.[0-9]$)" - ], - "stop": true, - "actions": [{"name": "log"}] - }, - { - "name": "robot agent forbidden", - "limit": 0, - "stop": true, - "filters": ["Header:User-Agent=([Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser)"], - "actions": [ - {"name": "block", - "params": {"message": "Rate limit exceeded 10001"}} - ] - }, - { - "name": "bot forbidden", - "limit": 0, - "stop": true, - "filters": ["Header:User-Agent=(Googlebot|GoogleImageProxy|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot|ZmEu|BLEXBot|bitlybot)"], - "actions": [ - {"name": "block", - "params": {"message": "Rate limit exceeded 10002"}} - ] - }, - { - "name": "block missing accept-language", - "filters": ["!Header:Accept-Language"], - "limit": 0, - "stop": true, - "actions": [ - {"name": "block", - "params": {"message": "Rate limit exceeded 10003"}} - ] - }, - { - "name": "block Connection:close", - "filters": ["Header:Connection=close"], - "limit": 0, - "stop": true, - "actions": [ - {"name": "block", - "params": {"message": "Rate limit exceeded 10004"}} - ] - }, - { - "name": "block no gzip support", - "filters": ["!Header:Accept-Encoding=(^gzip$|^gzip[;,]|[; ]gzip$|[; ]gzip[;,])"], - "limit": 0, - "stop": true, - "actions": [ - {"name": "block", - "params": {"message": "Rate limit exceeded 10005"}} - ] - }, - { - "name": "block no deflate support", - "filters": ["!Header:Accept-Encoding=(^deflate$|^deflate[;,]|[; ]deflate$|[; ]deflate[;,])"], - "limit": 0, - "stop": true, - "actions": [ - {"name": "block", - "params": {"message": "Rate limit exceeded 10006"}} - ] - }, - { - "name": "rss/json limit", - "interval": 3, - "limit": 10, - "stop": true, - "filters": ["Param:format=(csv|json|rss)"], - "aggregations": ["Header:X-Forwarded-For"], - "actions": [ - {"name": "block", - "params": {"message": "Rate limit exceeded 10007, try again later."}} - ] - } - ] - } -] diff --git a/etc/nginx/proxy_spot_params b/etc/nginx/proxy_spot_params index 6f0fde43b4a719b6ceea1526efeb3844feff9769..418bae0bb8491135880f5f3e3f3816b52d194aec 100644 --- a/etc/nginx/proxy_spot_params +++ b/etc/nginx/proxy_spot_params @@ -1,8 +1,10 @@ proxy_pass http://spot:8080; proxy_set_header Host $http_host; +proxy_set_header Connection $http_connection; +proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto https; proxy_buffering on; proxy_buffer_size 128k; proxy_buffers 4 256k; -proxy_busy_buffers_size 256k; \ No newline at end of file +proxy_busy_buffers_size 256k; diff --git a/searx/plugins/limiter.py b/searx/plugins/limiter.py index c0ce575e09b1cd42233a24ab2a4915605f81596d..098dc75465bdc8691ba9687960dec3198af7cb4b 100644 --- a/searx/plugins/limiter.py +++ b/searx/plugins/limiter.py @@ -79,7 +79,15 @@ def is_accepted_request() -> bool: logger.debug("BLOCK %s: Accept-Encoding misses text/html", x_forwarded_for) return False - if request.args.get("format", "html") != "html": + if request.args.get("format", "html") == "json" and request.args.get("categories", "general") in [ + "images", + "videos", + ]: + c = incr_sliding_window(redis_client, "API media limit" + x_forwarded_for, 60) + if c > 10: + logger.debug("BLOCK %s: API media limit exceeded", x_forwarded_for) + return False + elif request.args.get("format", "html") != "html": c = incr_sliding_window(redis_client, "API limit" + x_forwarded_for, 3600) if c > 4: logger.debug("BLOCK %s: API limit exceeded", x_forwarded_for) diff --git a/searx/plugins/rest_api.py b/searx/plugins/rest_api.py deleted file mode 100644 index ceb8d6d8b3e8ff3bba0f0c59e0bccc11ab059d59..0000000000000000000000000000000000000000 --- a/searx/plugins/rest_api.py +++ /dev/null @@ -1,52 +0,0 @@ -import hmac -import hashlib -from urllib.parse import urlencode -from flask_babel import gettext -from searx import settings - - -name = gettext("Rest API") -description = gettext("Update REST API") -default_on = True -preference_section = "general" - - -def proxify(url): - """helper copied from webapp module""" - if url.startswith("//"): - url = "https:" + url - - if not settings.get("result_proxy"): - return url - - if url.startswith("data:image/"): - # 50 is an arbitrary number to get only the beginning of the image. - partial_base64 = url[len("data:image/") : 50].split(";") - if ( - len(partial_base64) == 2 - and partial_base64[0] in ["gif", "png", "jpeg", "pjpeg", "webp", "tiff", "bmp"] - and partial_base64[1].startswith("base64,") - ): - return url - else: - return None - - url_params = dict(mortyurl=url.encode("utf-8")) - - if settings["result_proxy"].get("key"): - url_params["mortyhash"] = hmac.new( - settings["result_proxy"]["key"], url.encode("utf-8"), hashlib.sha256 - ).hexdigest() - - return "{0}?{1}".format(settings["result_proxy"]["url"], urlencode(url_params)) - - -def on_result(request, search, result): - if request.form.get("format") != "json": - return True - - for attr in ["thumbnail", "thumbnail_src"]: - if attr in result: - result[attr] = proxify(result[attr]) - - return True diff --git a/searx/settings.yml b/searx/settings.yml index 05375f6273894cff5f687d4c4c1a5c0ed204a2b3..ab0b6280525a2fde3d640965d4ddb81cd8395bfe 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -35,6 +35,7 @@ server: X-Download-Options : noopen X-Robots-Tag : noindex, nofollow Referrer-Policy : no-referrer + limiter: true redis: # URL to connect redis database. Is overwritten by ${SEARXNG_REDIS_URL}. @@ -1287,7 +1288,6 @@ engines: timeout: 6.0 additional_tests: rosebud: *test_rosebud - disabled: True - name: tokyotoshokan engine: tokyotoshokan @@ -1383,7 +1383,6 @@ engines: - name: yahoo engine: yahoo shortcut: yh - disabled: true - name: yahoo news engine: yahoo_news diff --git a/searx/webapp.py b/searx/webapp.py index 8b6d3164fff759f99a32e5d938c9a4fd23e1f12d..3e015529689e34e9491ceb9e11bdd6eb82af61e2 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -379,7 +379,7 @@ def image_proxify(url: str): if url.startswith("//"): url = "https:" + url - if not request.preferences.get_value("image_proxy"): + if not settings["server"].get("image_proxy"): return url if url.startswith("data:image/"):