Commit 44085e31 authored by marc's avatar marc
Browse files

update engines_languages.json and languages.py

Also, fix fetch_languages.py so it can run on python3.
parent a524dbb8
This diff is collapsed.
......@@ -134,4 +134,4 @@ def _fetch_supported_languages(resp):
regions_json = loads(response_page)
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
return supported_languages
return list(supported_languages)
......@@ -118,7 +118,7 @@ def _fetch_supported_languages(resp):
dom = fromstring(resp.text)
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
for option in options:
code = option.xpath('./@data-val')[0]
code = option.xpath('./@data-search-language')[0]
if code.startswith('nb-'):
code = code.replace('nb', 'no', 1)
supported_languages.append(code)
......
......@@ -5,6 +5,11 @@
language_codes = (
(u"ar-SA", u"العربية", u"", u"Arabic"),
(u"bg-BG", u"Български", u"", u"Bulgarian"),
(u"ca", u"Català", u"", u"Catalan"),
(u"ca-AD", u"Català", u"Andorra", u"Catalan"),
(u"ca-CT", u"Català", u"", u"Catalan"),
(u"ca-ES", u"Català", u"Espanya", u"Catalan"),
(u"ca-FR", u"Català", u"França", u"Catalan"),
(u"cs-CZ", u"Čeština", u"", u"Czech"),
(u"da-DK", u"Dansk", u"", u"Danish"),
(u"de", u"Deutsch", u"", u"German"),
......@@ -15,9 +20,7 @@ language_codes = (
(u"en", u"English", u"", u"English"),
(u"en-AU", u"English", u"Australia", u"English"),
(u"en-CA", u"English", u"Canada", u"English"),
(u"en-CY", u"English", u"Cyprus", u"English"),
(u"en-GB", u"English", u"United Kingdom", u"English"),
(u"en-GD", u"English", u"Grenada", u"English"),
(u"en-ID", u"English", u"Indonesia", u"English"),
(u"en-IE", u"English", u"Ireland", u"English"),
(u"en-IN", u"English", u"India", u"English"),
......@@ -28,6 +31,7 @@ language_codes = (
(u"en-US", u"English", u"United States", u"English"),
(u"en-ZA", u"English", u"South Africa", u"English"),
(u"es", u"Español", u"", u"Spanish"),
(u"es-AD", u"Español", u"Andorra", u"Spanish"),
(u"es-AR", u"Español", u"Argentina", u"Spanish"),
(u"es-CL", u"Español", u"Chile", u"Spanish"),
(u"es-CO", u"Español", u"Colombia", u"Spanish"),
......@@ -38,38 +42,32 @@ language_codes = (
(u"et-EE", u"Eesti", u"", u"Estonian"),
(u"fi-FI", u"Suomi", u"", u"Finnish"),
(u"fr", u"Français", u"", u"French"),
(u"fr-AD", u"Français", u"Andorre", u"French"),
(u"fr-BE", u"Français", u"Belgique", u"French"),
(u"fr-CA", u"Français", u"Canada", u"French"),
(u"fr-CH", u"Français", u"Suisse", u"French"),
(u"fr-FR", u"Français", u"France", u"French"),
(u"he-IL", u"עברית", u"", u"Hebrew"),
(u"hr-HR", u"Hrvatski", u"", u"Croatian"),
(u"hu-HU", u"Magyar", u"", u"Hungarian"),
(u"id-ID", u"Bahasa Indonesia", u"", u"Indonesian"),
(u"it", u"Italiano", u"", u"Italian"),
(u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
(u"it-IT", u"Italiano", u"Italia", u"Italian"),
(u"ja-JP", u"日本語", u"", u"Japanese"),
(u"ko-KR", u"한국어", u"", u"Korean"),
(u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
(u"lv-LV", u"Latviešu", u"", u"Latvian"),
(u"ms-MY", u"Bahasa Melayu", u"", u"Malay"),
(u"nl", u"Nederlands", u"", u"Dutch"),
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
(u"no-NO", u"Norsk", u"", u"Norwegian"),
(u"pl-PL", u"Polski", u"", u"Polish"),
(u"pt", u"Português", u"", u"Portuguese"),
(u"pt-AD", u"Português", u"Andorra", u"Portuguese"),
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
(u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
(u"ro-RO", u"Română", u"", u"Romanian"),
(u"ru-RU", u"Русский", u"", u"Russian"),
(u"sk-SK", u"Slovenčina", u"", u"Slovak"),
(u"sl", u"Slovenščina", u"", u"Slovenian"),
(u"sv-SE", u"Svenska", u"", u"Swedish"),
(u"th-TH", u"ไทย", u"", u"Thai"),
(u"tr-TR", u"Türkçe", u"", u"Turkish"),
(u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
(u"zh", u"中文", u"", u"Chinese"),
(u"zh-CN", u"中文", u"中国", u"Chinese"),
(u"zh-HK", u"中文", u"香港", u"Chinese"),
......
......@@ -139,9 +139,9 @@ class TestSwisscowsEngine(SearxTestCase):
<div id="regions-popup">
<div>
<ul>
<li><a data-val="browser"></a></li>
<li><a data-val="de-CH"></a></li>
<li><a data-val="fr-CH"></a></li>
<li><a data-search-language="browser"></a></li>
<li><a data-search-language="de-CH"></a></li>
<li><a data-search-language="fr-CH"></a></li>
</ul>
</div>
</div>
......
......@@ -8,13 +8,13 @@
# are written in current directory to avoid overwriting in case something goes wrong.
from requests import get
from urllib import urlencode
from lxml.html import fromstring
from json import loads, dumps
from json import loads, dump
import io
from sys import path
path.append('../searx') # noqa
from searx import settings
from searx.url_utils import urlencode
from searx.engines import initialize_engines, engines
# Geonames API for country names.
......@@ -70,7 +70,7 @@ def get_country_name(locale):
json = loads(response.text)
content = json.get('geonames', None)
if content is None or len(content) != 1:
print "No country name found for " + locale[0] + "-" + locale[1]
print("No country name found for " + locale[0] + "-" + locale[1])
return ''
return content[0].get('countryName', '')
......@@ -84,11 +84,11 @@ def fetch_supported_languages():
try:
engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
except Exception as e:
print e
print(e)
# write json file
with io.open(engines_languages_file, "w", encoding="utf-8") as f:
f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
dump(engines_languages, f, ensure_ascii=False)
# Join all language lists.
......@@ -97,7 +97,7 @@ def join_language_lists():
global languages
# include wikipedia first for more accurate language names
languages = {code: lang for code, lang
in engines_languages['wikipedia'].iteritems()
in engines_languages['wikipedia'].items()
if valid_code(code)}
for engine_name in engines_languages:
......@@ -121,7 +121,7 @@ def join_language_lists():
# filter list to include only languages supported by most engines
min_supported_engines = int(0.70 * len(engines_languages))
languages = {code: lang for code, lang
in languages.iteritems()
in languages.items()
if len(lang.get('counter', [])) >= min_supported_engines or
len(languages.get(code.split('-')[0], {}).get('counter', [])) >= min_supported_engines}
......@@ -165,7 +165,7 @@ def filter_single_country_languages():
# Write languages.py.
def write_languages_file():
new_file = open(languages_file, 'w')
new_file = open(languages_file, 'wb')
file_content = '# -*- coding: utf-8 -*-\n'\
+ '# list of language codes\n'\
+ '# this file is generated automatically by utils/update_search_languages.py\n'\
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment