__init__.py 8.89 KB
Newer Older
asciimoo's avatar
asciimoo committed
1

asciimoo's avatar
asciimoo committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
'''
searx is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.

(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
'''

Gabor Nagy's avatar
Gabor Nagy committed
19
import sys
20
21
import threading
from os.path import realpath, dirname
22
from io import open
23
from babel.localedata import locale_identifiers
24
from flask_babel import gettext
Adam Tauber's avatar
Adam Tauber committed
25
from operator import itemgetter
26
from json import loads
27
from requests import get
Gabor Nagy's avatar
Gabor Nagy committed
28
from searx import settings
Adam Tauber's avatar
Adam Tauber committed
29
from searx import logger
30
from searx.utils import load_module, match_language
Adam Tauber's avatar
Adam Tauber committed
31
32
33


logger = logger.getChild('engines')
asciimoo's avatar
asciimoo committed
34
35

engine_dir = dirname(realpath(__file__))
asciimoo's avatar
asciimoo committed
36

asciimoo's avatar
asciimoo committed
37
engines = {}
asciimoo's avatar
asciimoo committed
38

asciimoo's avatar
asciimoo committed
39
40
categories = {'general': []}

41
languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read())
42
43
babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
               for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]
asciimoo's avatar
asciimoo committed
44

45
engine_shortcuts = {}
Adam Tauber's avatar
Adam Tauber committed
46
47
48
engine_default_args = {'paging': False,
                       'categories': ['general'],
                       'language_support': True,
49
                       'supported_languages': [],
Adam Tauber's avatar
Adam Tauber committed
50
51
52
                       'safesearch': False,
                       'timeout': settings['outgoing']['request_timeout'],
                       'shortcut': '-',
53
54
                       'disabled': False,
                       'suspend_end_time': 0,
Noemi Vanyi's avatar
Noemi Vanyi committed
55
56
                       'continuous_errors': 0,
                       'time_range_support': False}
57

58

Adam Tauber's avatar
Adam Tauber committed
59
def load_engine(engine_data):
60
61
62
63
64
65
66

    if '_' in engine_data['name']:
        logger.error('Engine name conains underscore: "{}"'.format(engine_data['name']))
        sys.exit(1)

    engine_module = engine_data['engine']

67
    try:
68
        engine = load_module(engine_module + '.py', engine_dir)
69
    except:
70
        logger.exception('Cannot load engine "{}"'.format(engine_module))
71
        return None
asciimoo's avatar
asciimoo committed
72

asciimoo's avatar
asciimoo committed
73
74
75
76
    for param_name in engine_data:
        if param_name == 'engine':
            continue
        if param_name == 'categories':
77
            if engine_data['categories'] == 'none':
78
79
                engine.categories = []
            else:
Adam Tauber's avatar
Adam Tauber committed
80
                engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
asciimoo's avatar
asciimoo committed
81
            continue
82
        setattr(engine, param_name, engine_data[param_name])
83

Adam Tauber's avatar
Adam Tauber committed
84
    for arg_name, arg_value in engine_default_args.items():
Adam Tauber's avatar
Adam Tauber committed
85
86
        if not hasattr(engine, arg_name):
            setattr(engine, arg_name, arg_value)
87

88
    # checking required variables
asciimoo's avatar
asciimoo committed
89
90
91
    for engine_attr in dir(engine):
        if engine_attr.startswith('_'):
            continue
92
93
        if engine_attr == 'inactive' and getattr(engine, engine_attr) is True:
            return None
asciimoo's avatar
asciimoo committed
94
        if getattr(engine, engine_attr) is None:
Adam Tauber's avatar
Adam Tauber committed
95
            logger.error('Missing engine config attribute: "{0}.{1}"'
96
                         .format(engine.name, engine_attr))
asciimoo's avatar
asciimoo committed
97
            sys.exit(1)
98

99
100
101
102
    # assign supported languages from json file
    if engine_data['name'] in languages:
        setattr(engine, 'supported_languages', languages[engine_data['name']])

103
104
105
106
107
108
109
110
111
112
113
114
115
    # find custom aliases for non standard language codes
    if hasattr(engine, 'supported_languages'):
        if hasattr(engine, 'language_aliases'):
            language_aliases = getattr(engine, 'language_aliases')
        else:
            language_aliases = {}

        for engine_lang in getattr(engine, 'supported_languages'):
            iso_lang = match_language(engine_lang, babel_langs, fallback=None)
            if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \
               iso_lang not in getattr(engine, 'supported_languages'):
                language_aliases[iso_lang] = engine_lang

116
        setattr(engine, 'language_aliases', language_aliases)
117

118
119
120
121
122
    # assign language fetching method if auxiliary method exists
    if hasattr(engine, '_fetch_supported_languages'):
        setattr(engine, 'fetch_supported_languages',
                lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))

123
124
125
126
    engine.stats = {
        'result_count': 0,
        'search_count': 0,
        'page_load_time': 0,
127
128
129
        'page_load_count': 0,
        'engine_time': 0,
        'engine_time_count': 0,
130
131
132
        'score_count': 0,
        'errors': 0
    }
133

Adam Tauber's avatar
Adam Tauber committed
134
135
136
137
138
139
140
141
    for category_name in engine.categories:
        categories.setdefault(category_name, []).append(engine)

    if engine.shortcut in engine_shortcuts:
        logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut))
        sys.exit(1)

    engine_shortcuts[engine.shortcut] = engine.name
asciimoo's avatar
asciimoo committed
142

Adam Tauber's avatar
Adam Tauber committed
143
    return engine
144

145

146
147
148
149
150
151
152
153
154
def to_percentage(stats, maxvalue):
    for engine_stat in stats:
        if maxvalue:
            engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
        else:
            engine_stat['percentage'] = 0
    return stats


asciimoo's avatar
asciimoo committed
155
def get_engines_stats():
asciimoo's avatar
asciimoo committed
156
    # TODO refactor
asciimoo's avatar
asciimoo committed
157
    pageloads = []
158
    engine_times = []
asciimoo's avatar
asciimoo committed
159
    results = []
asciimoo's avatar
asciimoo committed
160
    scores = []
161
    errors = []
asciimoo's avatar
asciimoo committed
162
    scores_per_result = []
asciimoo's avatar
asciimoo committed
163

164
    max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0  # noqa
asciimoo's avatar
asciimoo committed
165
166
167
    for engine in engines.values():
        if engine.stats['search_count'] == 0:
            continue
168
169
        results_num = \
            engine.stats['result_count'] / float(engine.stats['search_count'])
170
171
172
173
174
175
176
177
178
179
180

        if engine.stats['page_load_count'] != 0:
            load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count'])  # noqa
        else:
            load_times = 0

        if engine.stats['engine_time_count'] != 0:
            this_engine_time = engine.stats['engine_time'] / float(engine.stats['engine_time_count'])  # noqa
        else:
            this_engine_time = 0

asciimoo's avatar
asciimoo committed
181
        if results_num:
182
            score = engine.stats['score_count'] / float(engine.stats['search_count'])  # noqa
asciimoo's avatar
asciimoo committed
183
            score_per_result = score / results_num
asciimoo's avatar
asciimoo committed
184
        else:
asciimoo's avatar
asciimoo committed
185
            score = score_per_result = 0.0
186

asciimoo's avatar
asciimoo committed
187
        max_pageload = max(load_times, max_pageload)
188
189
        max_engine_times = max(this_engine_time, max_engine_times)
        max_results = max(results_num, max_results)
asciimoo's avatar
asciimoo committed
190
        max_score = max(score, max_score)
asciimoo's avatar
asciimoo committed
191
        max_score_per_result = max(score_per_result, max_score_per_result)
192
        max_errors = max(max_errors, engine.stats['errors'])
193

asciimoo's avatar
asciimoo committed
194
        pageloads.append({'avg': load_times, 'name': engine.name})
195
        engine_times.append({'avg': this_engine_time, 'name': engine.name})
asciimoo's avatar
asciimoo committed
196
        results.append({'avg': results_num, 'name': engine.name})
asciimoo's avatar
asciimoo committed
197
        scores.append({'avg': score, 'name': engine.name})
198
        errors.append({'avg': engine.stats['errors'], 'name': engine.name})
199
200
201
202
        scores_per_result.append({
            'avg': score_per_result,
            'name': engine.name
        })
asciimoo's avatar
asciimoo committed
203

204
205
206
207
208
209
    pageloads = to_percentage(pageloads, max_pageload)
    engine_times = to_percentage(engine_times, max_engine_times)
    results = to_percentage(results, max_results)
    scores = to_percentage(scores, max_score)
    scores_per_result = to_percentage(scores_per_result, max_score_per_result)
    erros = to_percentage(errors, max_errors)
210

211
    return [
212
213
214
215
        (
            gettext('Engine time (sec)'),
            sorted(engine_times, key=itemgetter('avg'))
        ),
216
        (
asciimoo's avatar
asciimoo committed
217
218
219
220
221
            gettext('Page loads (sec)'),
            sorted(pageloads, key=itemgetter('avg'))
        ),
        (
            gettext('Number of results'),
222
223
224
            sorted(results, key=itemgetter('avg'), reverse=True)
        ),
        (
asciimoo's avatar
asciimoo committed
225
226
227
228
229
            gettext('Scores'),
            sorted(scores, key=itemgetter('avg'), reverse=True)
        ),
        (
            gettext('Scores per result'),
230
231
            sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
        ),
asciimoo's avatar
asciimoo committed
232
233
234
235
        (
            gettext('Errors'),
            sorted(errors, key=itemgetter('avg'), reverse=True)
        ),
236
    ]
Adam Tauber's avatar
Adam Tauber committed
237
238


239
240
241
def load_engines(engine_list):
    global engines
    engines.clear()
Adam Tauber's avatar
Adam Tauber committed
242
243
    for engine_data in engine_list:
        engine = load_engine(engine_data)
244
245
        if engine is not None:
            engines[engine.name] = engine
246
247
248
249
250
    return engines


def initialize_engines(engine_list):
    load_engines(engine_list)
Adam Tauber's avatar
Adam Tauber committed
251
    for engine_name, engine in engines.items():
252
        if hasattr(engine, 'init'):
Adam Tauber's avatar
Adam Tauber committed
253
            init_fn = getattr(engine, 'init')
254
255
256

            def engine_init():
                init_fn()
Adam Tauber's avatar
Adam Tauber committed
257
258
                logger.debug('%s engine initialized', engine_name)
            logger.debug('Starting background initialization of %s engine', engine_name)
259
            threading.Thread(target=engine_init).start()