Unverified Commit 4a8a1655 authored by Adam Tauber's avatar Adam Tauber Committed by GitHub

Merge pull request #1803 from return42/filtron

 Tooling Box to setup & maintain searx instances and services. 
parents 0dc5bdb6 0616684b
# -*- coding: utf-8; mode: sh -*-
# SPDX-License-Identifier: AGPL-3.0-or-later
# shellcheck shell=bash disable=SC2034
#
# This environment is used by ./utils scripts like filtron.sh or searx.sh. The
# default values are *most flexible* and *best maintained*, you normally not
# need to change the defaults (except PUBLIC_URL).
#
# Before you change any value here you have to uninstall any previous
# installation. Further is it recommended to backup your changes simply by
# adding them to you local brand (git branch)::
#
# git add .config
# The public URL of the searx instance: PUBLIC_URL="https://mydomain.xy/searx"
# The default is taken from ./utils/brand.env.
PUBLIC_URL="${SEARX_URL}"
if [[ ${PUBLIC_URL} == "https://searx.me" ]]; then
# hint: Linux containers do not have DNS entries, lets use IPs
PUBLIC_URL="http://$(primary_ip)/searx"
fi
# searx.sh
# ---------
# SEARX_INTERNAL_URL="127.0.0.1:8888"
# Only change, if you maintain a searx brand in your searx fork.
# GIT_BRANCH="${GIT_BRANCH:-master}"
# filtron.sh
# ----------
# FILTRON_API="127.0.0.1:4005"
# FILTRON_LISTEN="127.0.0.1:4004"
# FILTRON_TARGET="127.0.0.1:8888"
# morty.sh
# --------
# morty listen address
# MORTY_LISTEN="127.0.0.1:3000"
# PUBLIC_URL_PATH_MORTY="/morty/"
# system services
# ---------------
# Common $HOME folder of the service accounts
# SERVICE_HOME_BASE="/usr/local"
# **experimental**: Set SERVICE_USER to run all services by one account, but be
# aware that removing discrete components might conflict!
# SERVICE_USER=searx
# to sync with .dockerignore
.coverage
coverage/
cache/
.installed.cfg
engines.cfg
env
......
# -*- coding: utf-8; mode: makefile-gmake -*-
.DEFAULT_GOAL=help
# START Makefile setup
export GIT_URL=https://github.com/asciimoo/searx
export GIT_BRANCH=master
export SEARX_URL=https://searx.me
export DOCS_URL=https://asciimoo.github.io/searx
# END Makefile setup
include utils/makefile.include
PYOBJECTS = searx
DOC = docs
PY_SETUP_EXTRAS ?= \[test\]
PYDIST=./dist/py
PYBUILD=./build/py
include utils/makefile.include
include utils/makefile.python
include utils/makefile.sphinx
all: clean install
PHONY += help
help:
PHONY += help-min help-all help
help: help-min
@echo ''
@echo 'to get more help: make help-all'
help-min:
@echo ' test - run developer tests'
@echo ' docs - build documentation'
@echo ' docs-live - autobuild HTML documentation while editing'
......@@ -33,9 +40,18 @@ help:
@echo ' docker - build Docker image'
@echo ' node.env - download & install npm dependencies locally'
@echo ''
@$(MAKE) -s -f utils/makefile.include make-help
@echo 'environment'
@echo ' SEARX_URL = $(SEARX_URL)'
@echo ' GIT_URL = $(GIT_URL)'
@echo ' DOCS_URL = $(DOCS_URL)'
@echo ''
@$(MAKE) -e -s make-help
help-all: help-min
@echo ''
@$(MAKE) -e -s python-help
@echo ''
@$(MAKE) -s -f utils/makefile.python python-help
@$(MAKE) -e -s docs-help
PHONY += install
install: buildenv pyenvinstall
......@@ -44,7 +60,7 @@ PHONY += uninstall
uninstall: pyenvuninstall
PHONY += clean
clean: pyclean node.clean test.clean
clean: pyclean docs-clean node.clean test.clean
$(call cmd,common_clean)
PHONY += run
......@@ -61,14 +77,24 @@ run: buildenv pyenvinstall
# docs
# ----
sphinx-doc-prebuilds:: buildenv pyenvinstall prebuild-includes
PHONY += docs
docs: buildenv pyenvinstall sphinx-doc
docs: sphinx-doc-prebuilds sphinx-doc
$(call cmd,sphinx,html,docs,docs)
PHONY += docs-live
docs-live: buildenv pyenvinstall sphinx-live
docs-live: sphinx-doc-prebuilds sphinx-live
$(call cmd,sphinx_autobuild,html,docs,docs)
PHONY += prebuild-includes
prebuild-includes:
$(Q)mkdir -p $(DOCS_BUILD)/includes
$(Q)./utils/searx.sh doc | cat > $(DOCS_BUILD)/includes/searx.rst
$(Q)./utils/filtron.sh doc | cat > $(DOCS_BUILD)/includes/filtron.rst
$(Q)./utils/morty.sh doc | cat > $(DOCS_BUILD)/includes/morty.rst
$(GH_PAGES)::
@echo "doc available at --> $(DOCS_URL)"
......@@ -94,12 +120,14 @@ useragents.update: pyenvinstall
buildenv:
$(Q)echo "build searx/brand.py"
$(Q)echo "GIT_URL = '$(GIT_URL)'" > searx/brand.py
$(Q)echo "GIT_BRANCH = '$(GIT_BRANCH)'" >> searx/brand.py
$(Q)echo "ISSUE_URL = 'https://github.com/asciimoo/searx/issues'" >> searx/brand.py
$(Q)echo "SEARX_URL = '$(SEARX_URL)'" >> searx/brand.py
$(Q)echo "DOCS_URL = '$(DOCS_URL)'" >> searx/brand.py
$(Q)echo "PUBLIC_INSTANCES = 'https://searx.space'" >> searx/brand.py
$(Q)echo "build utils/brand.env"
$(Q)echo "export GIT_URL='$(GIT_URL)'" > utils/brand.env
$(Q)echo "export GIT_BRANCH='$(GIT_BRANCH)'" >> utils/brand.env
$(Q)echo "export ISSUE_URL='https://github.com/asciimoo/searx/issues'" >> utils/brand.env
$(Q)echo "export SEARX_URL='$(SEARX_URL)'" >> utils/brand.env
$(Q)echo "export DOCS_URL='$(DOCS_URL)'" >> utils/brand.env
......@@ -182,8 +210,7 @@ gecko.driver:
# test
# ----
PHONY += test test.pylint test.pep8 test.unit test.coverage test.robot
PHONY += test test.sh test.pylint test.pep8 test.unit test.coverage test.robot
test: buildenv test.pylint test.pep8 test.unit gecko.driver test.robot
ifeq ($(PY),2)
......@@ -191,6 +218,7 @@ test.pylint:
@echo "LINT skip liniting py2"
else
# TODO: balance linting with pylint
test.pylint: pyenvinstall
$(call cmd,pylint,\
searx/preferences.py \
......@@ -202,6 +230,17 @@ endif
# E402 module level import not at top of file
# W503 line break before binary operator
# ubu1604: uses shellcheck v0.3.7 (from 04/2015), no longer supported!
test.sh:
shellcheck -x -s bash utils/brand.env
shellcheck -x utils/lib.sh
shellcheck -x utils/filtron.sh
shellcheck -x utils/searx.sh
shellcheck -x utils/morty.sh
shellcheck -x utils/lxc.sh
shellcheck -x utils/lxc-searx.env
shellcheck -x .config.sh
test.pep8: pyenvinstall
@echo "TEST pep8"
$(Q)$(PY_ENV_ACT); pep8 --exclude=searx/static --max-line-length=120 --ignore "E402,W503" searx tests
......
......@@ -33,7 +33,7 @@ p.sidebar-title, .sidebar p {
/* admonitions
*/
div.admonition, div.topic {
div.admonition, div.topic, div.toctree-wrapper {
background-color: #fafafa;
margin: 8px 0px;
padding: 1em;
......@@ -42,6 +42,16 @@ div.admonition, div.topic {
border-right: none;
border-bottom: none;
border-left: 5pt solid #ccc;
list-style-type: disclosure-closed;
}
div.toctree-wrapper p.caption {
font-weight: normal;
font-size: 24px;
margin: 0 0 10px 0;
padding: 0;
line-height: 1;
display: inline;
}
p.admonition-title:after {
......@@ -128,3 +138,32 @@ caption {
caption-side: top;
text-align: left;
}
/* bugs since sphinx 3.1
See sphinx-doc project, PR 7838 & 7484 with elementary patch to the basic CSS:
- https://github.com/sphinx-doc/sphinx/issues/7838#issuecomment-646009605
- https://github.com/sphinx-doc/sphinx/pull/7484#issuecomment-646058972
*/
li > p:first-child {
margin-top: 0;
}
li > p:last-child {
margin-bottom: 0;
}
div.admonition dl {
margin-bottom: 0;
}
div.sidebar {
clear: none;
}
div.admonition, div.topic, pre {
clear: none;
}
......@@ -4,11 +4,11 @@ digraph G {
edge [fontname="Sans"];
browser [label="Browser", shape=Mdiamond];
rp [label="Reverse Proxy", href="url to configure reverse proxy"];
filtron [label="Filtron", href="https://github.com/asciimoo/filtron"];
morty [label="Morty", href="https://github.com/asciimoo/morty"];
rp [label="Reverse Proxy", href="https://asciimoo.github.io/searx/utils/filtron.sh.html#public-reverse-proxy"];
filtron [label="Filtron", href="https://asciimoo.github.io/searx/utils/filtron.sh.html"];
morty [label="Morty", href="https://asciimoo.github.io/searx/utils/morty.sh.html"];
static [label="Static files", href="url to configure static files"];
uwsgi [label="uwsgi", href="url to configure uwsgi"]
uwsgi [label="uwsgi", href="https://asciimoo.github.io/searx/utils/searx.sh.html"]
searx1 [label="Searx #1"];
searx2 [label="Searx #2"];
searx3 [label="Searx #3"];
......
......@@ -4,17 +4,21 @@
Architecture
============
.. sidebar:: Needs work!
.. sidebar:: Further reading
This article needs some work / Searx is a collaborative effort. If you have
any contribution, feel welcome to send us your :pull:`PR <../pulls>`, see
:ref:`how to contribute`.
- Reverse Proxy: :ref:`Apache <apache searx site>` & :ref:`nginx <nginx searx
site>`
- Filtron: :ref:`searx filtron`
- Morty: :ref:`searx morty`
- uWSGI: :ref:`searx uwsgi`
- Searx: :ref:`installation basic`
Herein you will find some hints and suggestions about typical architectures of
searx infrastructures.
We start with a contribution from :pull:`@dalf <1776#issuecomment-567917320>`.
It shows a *reference* setup for public searx instances.
It shows a *reference* setup for public searx instances which can build up and
maintained by the scripts from our :ref:`toolboxing`.
.. _arch public:
......
......@@ -9,8 +9,27 @@ Buildhosts
If you have any contribution send us your :pull:`PR <../pulls>`, see
:ref:`how to contribute`.
.. contents:: Contents
:depth: 2
:local:
:backlinks: entry
To get best results from build, its recommend to install additional packages
on build hosts.
on build hosts (see :ref:`searx.sh`).::
sudo -H ./utils/searx.sh install buildhost
This will install packages needed by searx:
.. kernel-include:: $DOCS_BUILD/includes/searx.rst
:start-after: START distro-packages
:end-before: END distro-packages
and packages needed to build docuemtation and run tests:
.. kernel-include:: $DOCS_BUILD/includes/searx.rst
:start-after: START build-packages
:end-before: END build-packages
.. _docs build:
......@@ -35,8 +54,17 @@ processing additional packages are needed. The XeTeX_ needed not only for PDF
creation, its also needed for :ref:`math` when HTML output is build.
To be able to do :ref:`sphinx:math-support` without CDNs, the math are rendered
as images (``sphinx.ext.imgmath`` extension). If your docs build (``make
docs``) shows warnings like this::
as images (``sphinx.ext.imgmath`` extension).
Here is the extract from the :origin:`docs/conf.py` file, setting math renderer
to ``imgmath``:
.. literalinclude:: ../conf.py
:language: python
:start-after: # sphinx.ext.imgmath setup
:end-before: # sphinx.ext.imgmath setup END
If your docs build (``make docs``) shows warnings like this::
WARNING: dot(1) not found, for better output quality install \
graphviz from http://www.graphviz.org
......@@ -47,8 +75,6 @@ docs``) shows warnings like this::
you need to install additional packages on your build host, to get better HTML
output.
.. _system requirements:
.. tabs::
.. group-tab:: Ubuntu / debian
......@@ -92,12 +118,38 @@ For PDF output you also need:
$ sudo dnf install \
texlive-collection-fontsrecommended texlive-collection-latex \
dejavu-sans-fonts dejavu-serif-fonts dejavu-sans-mono-fonts
dejavu-sans-fonts dejavu-serif-fonts dejavu-sans-mono-fonts \
ImageMagick
.. _system requirements END:
.. _sh lint:
.. literalinclude:: ../conf.py
:language: python
:start-after: # sphinx.ext.imgmath setup
:end-before: # sphinx.ext.imgmath setup END
Lint shell scripts
==================
.. _ShellCheck: https://github.com/koalaman/shellcheck
To lint shell scripts, we use ShellCheck_ - A shell script static analysis tool.
.. SNIP sh lint requirements
.. tabs::
.. group-tab:: Ubuntu / debian
.. code-block:: sh
$ sudo apt install shellcheck
.. group-tab:: Arch Linux
.. code-block:: sh
$ sudo pacman -S shellcheck
.. group-tab:: Fedora / RHEL
.. code-block:: sh
$ sudo dnf install ShellCheck
.. SNAP sh lint requirements
.. _searx filtron:
==========================
How to protect an instance
==========================
Searx depens on external search services. To avoid the abuse of these services
.. sidebar:: further reading
- :ref:`filtron.sh`
- :ref:`nginx searx site`
.. contents:: Contents
:depth: 2
:local:
:backlinks: entry
.. _filtron: https://github.com/asciimoo/filtron
Searx depends on external search services. To avoid the abuse of these services
it is advised to limit the number of requests processed by searx.
An application firewall, ``filtron`` solves exactly this problem. Information
on how to install it can be found at the `project page of filtron
<https://github.com/asciimoo/filtron>`__.
An application firewall, filtron_ solves exactly this problem. Filtron is just
a middleware between your web server (nginx, apache, ...) and searx, we describe
such infratructures in chapter: :ref:`architecture`.
filtron & go
============
.. _Go: https://golang.org/
.. _filtron README: https://github.com/asciimoo/filtron/blob/master/README.md
Filtron needs Go_ installed. If Go_ is preinstalled, filtron_ is simply
installed by ``go get`` package management (see `filtron README`_). If you use
filtron as middleware, a more isolated setup is recommended. To simplify such
an installation and the maintenance of, use our script :ref:`filtron.sh`.
.. _Sample configuration of filtron:
Sample configuration of filtron
===============================
.. sidebar:: Tooling box
- :origin:`/etc/filtron/rules.json <utils/templates/etc/filtron/rules.json>`
An example configuration can be find below. This configuration limits the access
of:
......@@ -24,105 +57,104 @@ of:
.. code:: json
[{
"name":"search request",
"filters":[
"Param:q",
"Path=^(/|/search)$"
],
"interval":"<time-interval-in-sec (int)>",
"limit":"<max-request-number-in-interval (int)>",
"subrules":[
{
"name":"roboagent limit",
"interval":"<time-interval-in-sec (int)>",
"limit":"<max-request-number-in-interval (int)>",
"filters":[
"Header:User-Agent=(curl|cURL|Wget|python-requests|Scrapy|FeedFetcher|Go-http-client)"
],
"actions":[
{
"name":"block",
"params":{
"message":"Rate limit exceeded"
}
}
]
},
{
"name":"botlimit",
"limit":0,
"stop":true,
"filters":[
"Header:User-Agent=(Googlebot|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT)"
],
"actions":[
{
"name":"block",
"params":{
"message":"Rate limit exceeded"
}
}
]
},
{
"name":"IP limit",
"interval":"<time-interval-in-sec (int)>",
"limit":"<max-request-number-in-interval (int)>",
"stop":true,
"aggregations":[
"Header:X-Forwarded-For"
],
"actions":[
{
"name":"block",
"params":{
"message":"Rate limit exceeded"
}
}
]
},
{
"name":"rss/json limit",
"interval":"<time-interval-in-sec (int)>",
"limit":"<max-request-number-in-interval (int)>",
"stop":true,
"filters":[
"Param:format=(csv|json|rss)"
],
"actions":[
{
"name":"block",
"params":{
"message":"Rate limit exceeded"
}
}
]
},
{
"name":"useragent limit",
"interval":"<time-interval-in-sec (int)>",
"limit":"<max-request-number-in-interval (int)>",
"aggregations":[
"Header:User-Agent"
[
{
"name": "search request",
"filters": [
"Param:q",
"Path=^(/|/search)$"
],
"actions":[
{
"name":"block",
"params":{
"message":"Rate limit exceeded"
}
}
"interval": "<time-interval-in-sec (int)>"
"limit": "<max-request-number-in-interval (int)>",
"subrules": [
{
"name": "missing Accept-Language",
"filters": ["!Header:Accept-Language"],
"limit": "<max-request-number-in-interval (int)>",
"stop": true,
"actions": [
{"name":"log"},
{"name": "block",
"params": {"message": "Rate limit exceeded"}}
]
},
{
"name": "suspiciously Connection=close header",
"filters": ["Header:Connection=close"],
"limit": "<max-request-number-in-interval (int)>",
"stop": true,
"actions": [
{"name":"log"},
{"name": "block",
"params": {"message": "Rate limit exceeded"}}
]
},
{
"name": "IP limit",
"interval": "<time-interval-in-sec (int)>"
"limit": "<max-request-number-in-interval (int)>",
"stop": true,
"aggregations": [
"Header:X-Forwarded-For"
],
"actions": [
{ "name": "log"},
{ "name": "block",
"params": {
"message": "Rate limit exceeded"
}
}
]
},
{
"name": "rss/json limit",
"filters": [
"Param:format=(csv|json|rss)"
],
"interval": "<time-interval-in-sec (int)>"
"limit": "<max-request-number-in-interval (int)>",
"stop": true,
"actions": [
{ "name": "log"},
{ "name": "block",
"params": {
"message": "Rate limit exceeded"
}
}
]
},
{
"name": "useragent limit",
"interval": "<time-interval-in-sec (int)>"
"limit": "<max-request-number-in-interval (int)>",
"aggregations": [
"Header:User-Agent"
],
"actions": [
{ "name": "log"},
{ "name": "block",
"params": {
"message": "Rate limit exceeded"
}
}
]
}
]
}
]
}]
}
]
.. _filtron route request:
Route request through filtron
=============================
.. sidebar:: further reading
- :ref:`filtron.sh overview`