Commit 43f9265e authored by Nicolas Gelot's avatar Nicolas Gelot

Merge remote-tracking branch 'asciimoo/master'

parents c27b161f 5fa3c112
......@@ -98,3 +98,7 @@ if 'IMAGE_PROXY' in environ:
settings['server']['image_proxy'] = environ['IMAGE_PROXY']
if 'SEARX_REDIS_HOST' in environ:
settings['redis']['host'] = environ['SEARX_REDIS_HOST']
if 'HTTP_PROXY_URL' in environ:
settings['proxies']['http'] = environ['HTTP_PROXY_URL']
if 'HTTPS_PROXY_URL' in environ:
settings['proxies']['https'] = environ['HTTPS_PROXY_URL']
This diff is collapsed.
......@@ -113,8 +113,7 @@ def load_engine(engine_data):
iso_lang not in getattr(engine, 'supported_languages'):
language_aliases[iso_lang] = engine_lang
if language_aliases:
setattr(engine, 'language_aliases', language_aliases)
setattr(engine, 'language_aliases', language_aliases)
# assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'):
......
......@@ -36,7 +36,7 @@ def locale_to_lang_code(locale):
# wikis for some languages were moved off from the main site, we need to make
# requests to correct URLs to be able to get results in those languages
lang_urls = {
'en': {
'all': {
'base': 'https://wiki.archlinux.org',
'search': '/index.php?title=Special:Search&offset={offset}&{query}'
},
......@@ -67,7 +67,7 @@ lang_urls = {
def get_lang_urls(language):
if language in lang_urls:
return lang_urls[language]
return lang_urls['en']
return lang_urls['all']
# Language names to build search requests for
......
"""
Asksteem (general)
@website https://asksteem.com/
@provide-api yes
@using-api yes
@results JSON (https://github.com/Hoxly/asksteem-docs/wiki)
@stable yes
@parse url, title, content
"""
from json import loads
from searx.url_utils import urlencode
# engine dependent config
categories = ['general']
paging = True
language_support = False
disabled = True
# search-url
search_url = 'https://api.asksteem.com/search?{params}'
result_url = 'https://steemit.com/@{author}/{title}'
# do search-request
def request(query, params):
url = search_url.format(params=urlencode({'q': query, 'pg': params['pageno']}))
params['url'] = url
return params
# get response from search-request
def response(resp):
json = loads(resp.text)
results = []
for result in json.get('results', []):
results.append({'url': result_url.format(author=result['author'], title=result['permlink']),
'title': result['title'],
'content': result['summary']})
return results
......@@ -34,7 +34,10 @@ search_string = 'search?{query}&first={offset}'
def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1
lang = match_language(params['language'], supported_languages, language_aliases)
if params['language'] == 'all':
lang = 'EN'
else:
lang = match_language(params['language'], supported_languages, language_aliases)
query = 'language:{} {}'.format(lang.split('-')[0].upper(), query)
......
......@@ -55,7 +55,7 @@ def request(query, params):
query=urlencode({'q': query}),
offset=offset)
language = match_language(params['language'], supported_languages).lower()
language = match_language(params['language'], supported_languages, language_aliases).lower()
params['cookies']['SRCHHPGUSR'] = \
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
......
......@@ -71,7 +71,10 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1
language = match_language(params['language'], supported_languages, language_aliases)
if params['language'] == 'all':
language = 'en-US'
else:
language = match_language(params['language'], supported_languages, language_aliases)
params['url'] = _get_url(query, language, offset, params['time_range'])
......
......@@ -33,7 +33,10 @@ supported_languages_url = 'https://api.dailymotion.com/languages'
# do search-request
def request(query, params):
locale = match_language(params['language'], supported_languages)
if params['language'] == 'all':
locale = 'en-US'
else:
locale = match_language(params['language'], supported_languages)
params['url'] = search_url.format(
query=urlencode({'search': query, 'localization': locale}),
......
......@@ -54,6 +54,9 @@ content_xpath = './/a[@class="result__snippet"]'
# match query's language to a region code that duckduckgo will accept
def get_region_code(lang, lang_list=[]):
if lang == 'all':
return None
lang_code = match_language(lang, lang_list, language_aliases, 'wt-WT')
lang_parts = lang_code.split('-')
......@@ -61,7 +64,6 @@ def get_region_code(lang, lang_list=[]):
return lang_parts[1].lower() + '-' + lang_parts[0].lower()
# do search-request
def request(query, params):
if params['time_range'] and params['time_range'] not in time_range_dict:
return params
......@@ -69,8 +71,12 @@ def request(query, params):
offset = (params['pageno'] - 1) * 30
region_code = get_region_code(params['language'], supported_languages)
params['url'] = url.format(
query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset)
if region_code:
params['url'] = url.format(
query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset)
else:
params['url'] = url.format(
query=urlencode({'q': query}), offset=offset, dc_param=offset)
if params['time_range'] in time_range_dict:
params['url'] += time_range_url.format(range=time_range_dict[params['time_range']])
......
......@@ -56,8 +56,12 @@ def request(query, params):
safesearch = params['safesearch'] - 1
region_code = get_region_code(params['language'], lang_list=supported_languages)
params['url'] = images_url.format(
query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd)
if region_code:
params['url'] = images_url.format(
query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd)
else:
params['url'] = images_url.format(
query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
return params
......
......@@ -40,7 +40,10 @@ def request(query, params):
offset = (params['pageno'] - 1) * number_of_results + 1
categorie = search_category.get(params['category'], 'web')
language = params['language'].split('-')[0]
if params['language'] == 'all':
language = 'en'
else:
language = params['language'].split('-')[0]
# if language is not supported, put it in english
if language != 'en' and\
......
......@@ -50,9 +50,12 @@ supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
def request(query, params):
offset = (params['pageno'] - 1) * number_of_results
language = params['language'].replace('-', '_').lower()
if language.split('-')[0] != 'zh':
language = language.split('-')[0]
if params['language'] == 'all':
language = 'xx'
else:
language = params['language'].replace('-', '_').lower()
if language.split('-')[0] != 'zh':
language = language.split('-')[0]
if params['safesearch'] >= 1:
safesearch = 1
......
......@@ -165,7 +165,11 @@ def extract_text_from_dom(result, xpath):
def request(query, params):
offset = (params['pageno'] - 1) * 10
language = match_language(params['language'], supported_languages)
if params['language'] == 'all' or params['language'] == 'en-US':
language = 'en-GB'
else:
language = match_language(params['language'], supported_languages, language_aliases)
language_array = language.split('-')
if params['language'].find('-') > 0:
country = params['language'].split('-')[1]
......@@ -380,10 +384,10 @@ def attributes_to_html(attributes):
def _fetch_supported_languages(resp):
supported_languages = {}
dom = html.fromstring(resp.text)
options = dom.xpath('//table//td/font/label/span')
options = dom.xpath('//*[@id="langSec"]//input[@name="lr"]')
for option in options:
code = option.xpath('./@id')[0][1:]
name = option.text.title()
code = option.xpath('./@value')[0].split('_')[-1]
name = option.xpath('./@data-name')[0].title()
supported_languages[code] = {"name": name}
return supported_languages
......@@ -51,9 +51,10 @@ def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
search_options=urlencode(search_options))
language = match_language(params['language'], supported_languages).split('-')[0]
if language:
params['url'] += '&lr=lang_' + language
if params['language'] != 'all':
language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
if language:
params['url'] += '&lr=lang_' + language
return params
......
......@@ -45,7 +45,10 @@ def request(query, params):
format_strings = list(Formatter().parse(base_url))
language = params['language'].split('-')[0]
if params['language'] == 'all':
language = 'en'
else:
language = params['language'].split('-')[0]
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
if any(x[1] == 'language' for x in format_strings):
......
......@@ -35,9 +35,10 @@ def request(query, params):
search_string.format(query=urlencode({'q': query}),
limit=number_of_results)
language = params['language'].split('-')[0]
if language in supported_languages:
params['url'] = params['url'] + "&lang=" + language
if params['language'] != 'all':
language = params['language'].split('_')[0]
if language in supported_languages:
params['url'] = params['url'] + "&lang=" + language
# using searx User-Agent
params['headers']['User-Agent'] = searx_useragent()
......
......@@ -46,8 +46,9 @@ def request(query, params):
offset=offset)
# add language tag
language = match_language(params['language'], supported_languages)
params['url'] += '&locale=' + language.replace('-', '_').lower()
if params['language'] != 'all':
language = match_language(params['language'], supported_languages, language_aliases)
params['url'] += '&locale=' + language.replace('-', '_').lower()
return params
......
......@@ -46,8 +46,9 @@ def request(query, params):
params['data'] = {'query': query,
'startat': offset}
# set language
params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
# set language if specified
if params['language'] != 'all':
params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
return params
......
......@@ -48,7 +48,7 @@ def response(resp):
search_lang = 'Farsi'
elif resp.search_params['language'] == 'pt-BR':
search_lang = 'Brazilian'
else:
elif resp.search_params['language'] != 'all':
search_lang = [lc[3]
for lc in language_codes
if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
......
......@@ -36,8 +36,12 @@ regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
# do search-request
def request(query, params):
region = match_language(params['language'], supported_languages)
ui_language = region.split('-')[0]
if params['language'] == 'all':
ui_language = 'browser'
region = 'browser'
else:
region = match_language(params['language'], supported_languages, language_aliases)
ui_language = region.split('-')[0]
search_path = search_string.format(
query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
......
......@@ -37,7 +37,12 @@ timestamp_xpath = './/span[contains(@class,"_timestamp")]'
# do search-request
def request(query, params):
params['url'] = search_url + urlencode({'q': query})
params['cookies']['lang'] = params['language'].split('-')[0]
# set language if specified
if params['language'] != 'all':
params['cookies']['lang'] = params['language'].split('-')[0]
else:
params['cookies']['lang'] = 'en'
return params
......
"""
Unsplash
@website https://unsplash.com
@provide-api yes (https://unsplash.com/developers)
@using-api no
@results JSON (using search portal's infiniscroll API)
@stable no (JSON format could change any time)
@parse url, title, img_src, thumbnail_src
"""
from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl
from json import loads
url = 'https://unsplash.com/'
search_url = url + 'napi/search/photos?'
categories = ['images']
page_size = 20
paging = True
def clean_url(url):
parsed = urlparse(url)
query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']]
return urlunparse((parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
urlencode(query),
parsed.fragment))
def request(query, params):
params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size})
return params
def response(resp):
results = []
json_data = loads(resp.text)
if 'results' in json_data:
for result in json_data['results']:
results.append({'template': 'images.html',
'url': clean_url(result['links']['html']),
'thumbnail_src': clean_url(result['urls']['thumb']),
'img_src': clean_url(result['urls']['raw']),
'title': result['description'],
'content': ''})
return results
......@@ -68,7 +68,10 @@ def response(resp):
html = fromstring(resp.text)
search_results = html.xpath(wikidata_ids_xpath)
language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
if resp.search_params['language'].split('-')[0] == 'all':
language = 'en'
else:
language = match_language(resp.search_params['language'], supported_languages, language_aliases).split('-')[0]
# TODO: make requests asynchronous to avoid timeout when result_count > 1
for search_result in search_results[:result_count]:
......
......@@ -31,7 +31,10 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
# set language in base_url
def url_lang(lang):
return match_language(lang, supported_languages).split('-')[0]
lang_pre = lang.split('-')[0]
if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
return 'en'
return match_language(lang, supported_languages, language_aliases).split('-')[0]
# do search-request
......
......@@ -65,7 +65,7 @@ def replace_pua_chars(text):
def response(resp):
results = []
search_results = etree.XML(resp.text)
search_results = etree.XML(resp.content)
# return empty array if there are no results
if search_results.xpath(failure_xpath):
......
......@@ -51,7 +51,9 @@ def request(query, params):
limit=number_of_results,
search_type=search_type)
params['url'] += '&lr=lang_' + params['language'].split('-')[0]
# add language tag if specified
if params['language'] != 'all':
params['url'] += '&lr=lang_' + params['language'].split('-')[0]
return params
......
......@@ -73,16 +73,25 @@ def _get_url(query, offset, language, time_range):
lang=language)
def _get_language(params):
if params['language'] == 'all':
return 'en'
language = match_language(params['language'], supported_languages, language_aliases)
if language not in language_aliases.values():
language = language.split('-')[0]
language = language.replace('-', '_').lower()
return language
# do search-request
def request(query, params):
if params['time_range'] and params['time_range'] not in time_range_dict:
return params
offset = (params['pageno'] - 1) * 10 + 1
language = match_language(params['language'], supported_languages, language_aliases)
if language not in language_aliases.values():
language = language.split('-')[0]
language = language.replace('-', '_').lower()
language = _get_language(params)
params['url'] = _get_url(query, offset, language, params['time_range'])
......
......@@ -41,7 +41,10 @@ suggestion_xpath = '//div[contains(@class,"VerALSOTRY")]//a'
def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1
language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
if params['language'] == 'all':
language = 'en'
else:
language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
params['url'] = search_url.format(offset=offset,
query=urlencode({'p': query}),
......
......@@ -34,7 +34,9 @@ def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
api_key=api_key)
params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
# add language tag if specified
if params['language'] != 'all':
params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
return params
......
......@@ -20,5 +20,6 @@ description = gettext('Results are opened in the same window by default. '
'This plugin overwrites the default behaviour to open links on new tabs/windows. '
'(JavaScript required)')
default_on = False
preference_section = 'ui'
js_dependencies = ('plugins/js/open_results_on_new_tab.js',)
......@@ -5,6 +5,7 @@ description = gettext('Navigate search results with Vim-like hotkeys '
'(JavaScript required). '
'Press "h" key on main or result page to get help.')
default_on = False
preference_section = 'ui'
js_dependencies = ('plugins/js/vim_hotkeys.js',)
css_dependencies = ('plugins/css/vim_hotkeys.css',)
......@@ -8,6 +8,7 @@ from searx.url_utils import parse_qs, urlencode
COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years
LANGUAGE_CODES = [l[0] for l in languages]
LANGUAGE_CODES.append('all')
DISABLED = 0
ENABLED = 1
DOI_RESOLVERS = list(settings['doi_resolvers'])
......
......@@ -216,10 +216,6 @@ def get_search_query_from_webapp(preferences, form):
else:
query_lang = preferences.get_value('language')
# provides backwards compatibility for requests using old language default
if query_lang == 'all':
query_lang = settings['search']['language']
# check language
if not VALID_LANGUAGE_CODE.match(query_lang):
raise SearxParameterException('language', query_lang)
......
......@@ -76,10 +76,6 @@ engines:
categories : science
timeout : 4.0
- name : asksteem
engine : asksteem
shortcut : as
- name : base
engine : base
shortcut : bs
......@@ -624,6 +620,11 @@ engines:
# content_xpath : //*[@class="meaning"]
# shortcut : ud
- name : unsplash
engine : unsplash
disabled: True
shortcut : us
- name : yahoo
engine : yahoo
shortcut : yh
......
......@@ -13,6 +13,7 @@
<legend>{{ _('Search language') }}</legend>
<p>
<select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %}
......
......@@ -14,6 +14,7 @@
<legend>{{ _('Search language') }}</legend>
<p>
<select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %}
......
......@@ -3,9 +3,10 @@
{% else %}
<select class="time_range custom-select form-control" id='language' name='language'>
{% endif %}
{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}
</option>
{% endfor %}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}
</option>
{% endfor %}
</select>
......@@ -9,6 +9,7 @@
<legend>{{ _('Search language') }}</legend>
<p>
<select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %}
......
......@@ -68,7 +68,7 @@
<option value="0" {% if safesearch == '0' %}selected="selected"{% endif %}>{{ _('None') }}</option>
</select>
</p>
<div class="description">{{ _('Filter content') }}</p>
<p class="description">{{ _('Filter content') }}</p>
</fieldset>
{{ plugin_preferences('general') }}
<fieldset>
......@@ -122,7 +122,7 @@
{% endif %}
{% endfor %}
</table>
<div>
</div>
{{ tab_footer() }}
{% endfor %}
{{ tabs_close() }}
......@@ -171,7 +171,7 @@
{{ _('With that list, you can assess searx transparency.') }}<br />
</p>
{% if cookies %}
{% if cookies %}
<table class="cookies">
<tr>
<th>{{ _('Cookie name') }}</th>
......@@ -186,14 +186,14 @@
</table>
{% else %}
{% include 'oscar/messages/no_cookies.html' %}
{% endif %}
{% endif %}
<h4>{{ _('Search URL of the currently saved preferences') }} :</h4>
<div class="selectable_url">
<pre>{{ url_for('index', _external=True) }}?preferences={{ preferences_url_params|e }}{% raw %}&amp;q=%s{% endraw %}</pre>
</div>
<p class="small_font">{{ _('Note: specifying custom settings in the search URL can reduce privacy by leaking data to the clicked result sites.') }}</p>
{{ tab_footer() }}
{{ tab_header('maintab', 'privacy', _('Privacy')) }}
......@@ -218,7 +218,7 @@
<div class="description">{{ _('Proxying image results through searx') }}</div>
</fieldset>
{{ plugin_preferences('privacy') }}
{{ tab_footer() }}
{{ tabs_close() }}
......@@ -226,7 +226,7 @@
<p class="small_font">{{ _('These settings are stored in your cookies, this allows us not to store this data about you.') }}
<br />
{{ _("These cookies serve your sole convenience, we don't use these cookies to track you.") }}
</p>
</p>
<input type="submit" value="{{ _('save') }}" />
<div class="{% if rtl %}left{% else %}right{% endif %} preferences_back"><a href="{{ url_for('clear_cookies') }}">{{ _('Reset defaults') }}</a></div>
......
......@@ -292,6 +292,9 @@ def image_proxify(url):
if not request.preferences.get_value('image_proxy'):
return url
if url.startswith('data:image/jpeg;base64,'):
return url
if settings.get('result_proxy'):
return proxify(url)
......@@ -586,8 +589,8 @@ def autocompleter():
if len(raw_results) <= 3 and completer:
# get language from cookie
language = request.preferences.get_value('language')
if not language:
language = settings['search']['language']
if not language or language == 'all':
language = 'en'
else:
language = language.split('-')[0]
# run autocompletion
......@@ -640,10 +643,7 @@ def preferences():
'warn_time': False}
if e.timeout > settings['outgoing']['request_timeout']:
stats[e.name]['warn_timeout'] = True
if match_language(request.preferences.get_value('language'),
getattr(e, 'supported_languages', []),
getattr(e, 'language_aliases', {}), None):
stats[e.name]['supports_selected_language'] = True
stats[e.name]['supports_selected_language'] = _is_selected_language_supported(e, request.preferences)
# get first element [0], the engine time,
# and then the second element [1] : the time (the first one is the label)
......@@ -674,6 +674,14 @@ def preferences():
preferences=True)