Commit 4d177039 authored by marc's avatar marc Committed by Marc Abonce Seguin
Browse files

remove 'all' option from search languages

parent 46fb0d86
...@@ -26,7 +26,7 @@ xpath_results = '//ul[@class="mw-search-results"]/li' ...@@ -26,7 +26,7 @@ xpath_results = '//ul[@class="mw-search-results"]/li'
xpath_link = './/div[@class="mw-search-result-heading"]/a' xpath_link = './/div[@class="mw-search-result-heading"]/a'
# cut 'en' from 'en_US', 'de' from 'de_CH', and so on # cut 'en' from 'en-US', 'de' from 'de-CH', and so on
def locale_to_lang_code(locale): def locale_to_lang_code(locale):
if locale.find('-') >= 0: if locale.find('-') >= 0:
locale = locale.split('-')[0] locale = locale.split('-')[0]
...@@ -36,7 +36,7 @@ def locale_to_lang_code(locale): ...@@ -36,7 +36,7 @@ def locale_to_lang_code(locale):
# wikis for some languages were moved off from the main site, we need to make # wikis for some languages were moved off from the main site, we need to make
# requests to correct URLs to be able to get results in those languages # requests to correct URLs to be able to get results in those languages
lang_urls = { lang_urls = {
'all': { 'en': {
'base': 'https://wiki.archlinux.org', 'base': 'https://wiki.archlinux.org',
'search': '/index.php?title=Special:Search&offset={offset}&{query}' 'search': '/index.php?title=Special:Search&offset={offset}&{query}'
}, },
...@@ -67,7 +67,7 @@ lang_urls = { ...@@ -67,7 +67,7 @@ lang_urls = {
def get_lang_urls(language): def get_lang_urls(language):
if language in lang_urls: if language in lang_urls:
return lang_urls[language] return lang_urls[language]
return lang_urls['all'] return lang_urls['en']
# Language names to build search requests for # Language names to build search requests for
......
...@@ -32,10 +32,7 @@ search_string = 'search?{query}&first={offset}' ...@@ -32,10 +32,7 @@ search_string = 'search?{query}&first={offset}'
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
if params['language'] != 'all': lang = params['language'].split('-')[0].upper()
lang = params['language'].split('-')[0].upper()
else:
lang = 'EN'
query = u'language:{} {}'.format(lang, query.decode('utf-8')).encode('utf-8') query = u'language:{} {}'.format(lang, query.decode('utf-8')).encode('utf-8')
......
...@@ -71,10 +71,7 @@ def request(query, params): ...@@ -71,10 +71,7 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
if params['language'] == 'all': language = params['language']
language = 'en-US'
else:
language = params['language']
params['url'] = _get_url(query, language, offset, params['time_range']) params['url'] = _get_url(query, language, offset, params['time_range'])
......
...@@ -32,10 +32,7 @@ supported_languages_url = 'https://api.dailymotion.com/languages' ...@@ -32,10 +32,7 @@ supported_languages_url = 'https://api.dailymotion.com/languages'
# do search-request # do search-request
def request(query, params): def request(query, params):
if params['language'] == 'all': locale = params['language']
locale = 'en-US'
else:
locale = params['language']
params['url'] = search_url.format( params['url'] = search_url.format(
query=urlencode({'search': query, 'localization': locale}), query=urlencode({'search': query, 'localization': locale}),
......
...@@ -44,9 +44,7 @@ content_xpath = './/a[@class="result__snippet"]' ...@@ -44,9 +44,7 @@ content_xpath = './/a[@class="result__snippet"]'
# match query's language to a region code that duckduckgo will accept # match query's language to a region code that duckduckgo will accept
def get_region_code(lang, lang_list=None): def get_region_code(lang, lang_list=None):
# custom fixes for languages # custom fixes for languages
if lang == 'all': if lang[:2] == 'ja':
region_code = None
elif lang[:2] == 'ja':
region_code = 'jp-jp' region_code = 'jp-jp'
elif lang[:2] == 'sl': elif lang[:2] == 'sl':
region_code = 'sl-sl' region_code = 'sl-sl'
...@@ -82,12 +80,8 @@ def request(query, params): ...@@ -82,12 +80,8 @@ def request(query, params):
offset = (params['pageno'] - 1) * 30 offset = (params['pageno'] - 1) * 30
region_code = get_region_code(params['language']) region_code = get_region_code(params['language'])
if region_code: params['url'] = url.format(
params['url'] = url.format( query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset)
query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset)
else:
params['url'] = url.format(
query=urlencode({'q': query}), offset=offset, dc_param=offset)
if params['time_range'] in time_range_dict: if params['time_range'] in time_range_dict:
params['url'] += time_range_url.format(range=time_range_dict[params['time_range']]) params['url'] += time_range_url.format(range=time_range_dict[params['time_range']])
......
...@@ -53,12 +53,8 @@ def request(query, params): ...@@ -53,12 +53,8 @@ def request(query, params):
safesearch = params['safesearch'] - 1 safesearch = params['safesearch'] - 1
region_code = get_region_code(params['language'], lang_list=supported_languages) region_code = get_region_code(params['language'], lang_list=supported_languages)
if region_code: params['url'] = images_url.format(
params['url'] = images_url.format( query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd)
query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd)
else:
params['url'] = images_url.format(
query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
return params return params
......
...@@ -40,10 +40,7 @@ def request(query, params): ...@@ -40,10 +40,7 @@ def request(query, params):
offset = (params['pageno'] - 1) * number_of_results + 1 offset = (params['pageno'] - 1) * number_of_results + 1
categorie = search_category.get(params['category'], 'web') categorie = search_category.get(params['category'], 'web')
if params['language'] == 'all': language = params['language'].split('-')[0]
language = 'en'
else:
language = params['language'].split('_')[0]
# if language is not supported, put it in english # if language is not supported, put it in english
if language != 'en' and\ if language != 'en' and\
......
...@@ -49,12 +49,9 @@ supported_languages_url = 'https://gigablast.com/search?&rxikd=1' ...@@ -49,12 +49,9 @@ supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * number_of_results offset = (params['pageno'] - 1) * number_of_results
if params['language'] == 'all': language = params['language'].replace('-', '_').lower()
language = 'xx' if language.split('-')[0] != 'zh':
else: language = language.split('-')[0]
language = params['language'].replace('-', '_').lower()
if language.split('-')[0] != 'zh':
language = language.split('-')[0]
if params['safesearch'] >= 1: if params['safesearch'] >= 1:
safesearch = 1 safesearch = 1
......
...@@ -165,7 +165,8 @@ def extract_text_from_dom(result, xpath): ...@@ -165,7 +165,8 @@ def extract_text_from_dom(result, xpath):
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 10 offset = (params['pageno'] - 1) * 10
if params['language'] == 'all': # temporary fix until a way of supporting en-US is found
if params['language'] == 'en-US':
params['language'] = 'en-GB' params['language'] = 'en-GB'
if params['language'][:2] == 'jv': if params['language'][:2] == 'jv':
......
...@@ -50,9 +50,8 @@ def request(query, params): ...@@ -50,9 +50,8 @@ def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'q': query}),
search_options=urlencode(search_options)) search_options=urlencode(search_options))
if params['language'] != 'all': language_array = params['language'].lower().split('-')
language_array = params['language'].lower().split('-') params['url'] += '&lr=lang_' + language_array[0]
params['url'] += '&lr=lang_' + language_array[0]
return params return params
......
...@@ -45,10 +45,7 @@ def request(query, params): ...@@ -45,10 +45,7 @@ def request(query, params):
format_strings = list(Formatter().parse(base_url)) format_strings = list(Formatter().parse(base_url))
if params['language'] == 'all': language = params['language'].split('-')[0]
language = 'en'
else:
language = params['language'].split('-')[0]
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)] # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
if any(x[1] == 'language' for x in format_strings): if any(x[1] == 'language' for x in format_strings):
......
...@@ -35,10 +35,9 @@ def request(query, params): ...@@ -35,10 +35,9 @@ def request(query, params):
search_string.format(query=urlencode({'q': query}), search_string.format(query=urlencode({'q': query}),
limit=number_of_results) limit=number_of_results)
if params['language'] != 'all': language = params['language'].split('-')[0]
language = params['language'].split('_')[0] if language in supported_languages:
if language in supported_languages: params['url'] = params['url'] + "&lang=" + language
params['url'] = params['url'] + "&lang=" + language
# using searx User-Agent # using searx User-Agent
params['headers']['User-Agent'] = searx_useragent() params['headers']['User-Agent'] = searx_useragent()
......
...@@ -44,18 +44,17 @@ def request(query, params): ...@@ -44,18 +44,17 @@ def request(query, params):
query=urlencode({'q': query}), query=urlencode({'q': query}),
offset=offset) offset=offset)
# add language tag if specified # add language tag
if params['language'] != 'all': if params['language'] == 'no' or params['language'].startswith('no-'):
if params['language'] == 'no' or params['language'].startswith('no-'): params['language'] = params['language'].replace('no', 'nb', 1)
params['language'] = params['language'].replace('no', 'nb', 1) if params['language'].find('-') < 0:
if params['language'].find('-') < 0: # tries to get a country code from language
# tries to get a country code from language for lang in supported_languages:
for lang in supported_languages: lc = lang.split('-')
lc = lang.split('-') if params['language'] == lc[0]:
if params['language'] == lc[0]: params['language'] = lang
params['language'] = lang break
break params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
return params return params
......
...@@ -45,9 +45,8 @@ def request(query, params): ...@@ -45,9 +45,8 @@ def request(query, params):
params['data'] = {'query': query, params['data'] = {'query': query,
'startat': offset} 'startat': offset}
# set language if specified # set language
if params['language'] != 'all': params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
return params return params
......
...@@ -48,7 +48,7 @@ def response(resp): ...@@ -48,7 +48,7 @@ def response(resp):
search_lang = 'Farsi' search_lang = 'Farsi'
elif resp.search_params['language'] == 'pt-BR': elif resp.search_params['language'] == 'pt-BR':
search_lang = 'Brazilian' search_lang = 'Brazilian'
elif resp.search_params['language'] != 'all': else:
search_lang = [lc[3] search_lang = [lc[3]
for lc in language_codes for lc in language_codes
if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]] if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
......
...@@ -35,10 +35,7 @@ regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=') ...@@ -35,10 +35,7 @@ regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
# do search-request # do search-request
def request(query, params): def request(query, params):
if params['language'] == 'all': if params['language'].split('-')[0] == 'no':
ui_language = 'browser'
region = 'browser'
elif params['language'].split('-')[0] == 'no':
region = 'nb-NO' region = 'nb-NO'
else: else:
region = params['language'] region = params['language']
......
...@@ -37,12 +37,7 @@ timestamp_xpath = './/span[contains(@class,"_timestamp")]' ...@@ -37,12 +37,7 @@ timestamp_xpath = './/span[contains(@class,"_timestamp")]'
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url + urlencode({'q': query}) params['url'] = search_url + urlencode({'q': query})
params['cookies']['lang'] = params['language'].split('-')[0]
# set language if specified
if params['language'] != 'all':
params['cookies']['lang'] = params['language'].split('-')[0]
else:
params['cookies']['lang'] = 'en'
return params return params
......
...@@ -57,8 +57,6 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' ...@@ -57,8 +57,6 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
def request(query, params): def request(query, params):
language = params['language'].split('-')[0] language = params['language'].split('-')[0]
if language == 'all':
language = 'en'
params['url'] = url_search.format( params['url'] = url_search.format(
query=urlencode({'label': query, 'language': language})) query=urlencode({'label': query, 'language': language}))
...@@ -71,8 +69,6 @@ def response(resp): ...@@ -71,8 +69,6 @@ def response(resp):
wikidata_ids = html.xpath(wikidata_ids_xpath) wikidata_ids = html.xpath(wikidata_ids_xpath)
language = resp.search_params['language'].split('-')[0] language = resp.search_params['language'].split('-')[0]
if language == 'all':
language = 'en'
# TODO: make requests asynchronous to avoid timeout when result_count > 1 # TODO: make requests asynchronous to avoid timeout when result_count > 1
for wikidata_id in wikidata_ids[:result_count]: for wikidata_id in wikidata_ids[:result_count]:
......
...@@ -31,7 +31,7 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' ...@@ -31,7 +31,7 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
# set language in base_url # set language in base_url
def url_lang(lang): def url_lang(lang):
lang = lang.split('-')[0] lang = lang.split('-')[0]
if lang == 'all' or lang not in supported_languages: if lang not in supported_languages:
language = 'en' language = 'en'
else: else:
language = lang language = lang
......
...@@ -51,9 +51,7 @@ def request(query, params): ...@@ -51,9 +51,7 @@ def request(query, params):
limit=number_of_results, limit=number_of_results,
search_type=search_type) search_type=search_type)
# add language tag if specified params['url'] += '&lr=lang_' + params['language'].split('-')[0]
if params['language'] != 'all':
params['url'] += '&lr=lang_' + params['language'].split('-')[0]
return params return params
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment