Unverified Commit 329172f6 authored by d-tux's avatar d-tux Committed by GitHub
Browse files

Merge branch 'master' into engines/unsplash

parents 4a127b19 2438b3c7
......@@ -83,3 +83,18 @@ generally made searx better:
- Joseph Nuthalapati @josephkiranbabu
- @maiki
- Richard Didier @zeph33
- Michael Vieria @Themimitoof
- Richard Nespithal @rndevfx
- Stanislas @angristan
- @rinpatch
- g. s. @usernameisntallowed
- Léo Bourrel @bourrel
- @cy8aer
- @Popolon
- Alice Ferrazzi @aliceinwire
- @LiquidLemon
- @dadosch
- @Venca24
- @ZEROF
- Ivan Skytte Jørgensen @isj-privacore
- @miicha
0.15.0 2019.01.06
=================
- New engines
- Acgsou (files, images, videos, music)
- Duden.de (general)
- Seznam (general)
- Mojeek (general)
- New languages
- Catalan
- Welsh
- Basque
- Persian (Iran)
- Galician
- Dutch (Belgium)
- Telugu
- Vietnamese
- New random answerers
- sha256
- uuidv4
- New DOI resolsvers
- sci-hub.tw
- Fix Vim mode on Firefox
- Fix custom select in Oscar theme
- Engine fixes (duckduckgo, google news, currency convert, gigablast, google scholar, wikidata image, etymonline, google videos, startpage, bing image)
- Minor simple theme fixes
- New Youtube icon in Oscar theme
- Get DOI rewriters from settings.yml
- Hide page buttons when infinite scrolling is enabled
- Update user agent versions
- Make Oscar style configurable
- Make suspend times of errored engines configurable
0.14.0 2018.02.19
=================
......
This diff is collapsed.
......@@ -113,8 +113,7 @@ def load_engine(engine_data):
iso_lang not in getattr(engine, 'supported_languages'):
language_aliases[iso_lang] = engine_lang
if language_aliases:
setattr(engine, 'language_aliases', language_aliases)
setattr(engine, 'language_aliases', language_aliases)
# assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'):
......
......@@ -36,7 +36,7 @@ def locale_to_lang_code(locale):
# wikis for some languages were moved off from the main site, we need to make
# requests to correct URLs to be able to get results in those languages
lang_urls = {
'en': {
'all': {
'base': 'https://wiki.archlinux.org',
'search': '/index.php?title=Special:Search&offset={offset}&{query}'
},
......@@ -67,7 +67,7 @@ lang_urls = {
def get_lang_urls(language):
if language in lang_urls:
return lang_urls[language]
return lang_urls['en']
return lang_urls['all']
# Language names to build search requests for
......
......@@ -34,7 +34,10 @@ search_string = 'search?{query}&first={offset}'
def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1
lang = match_language(params['language'], supported_languages, language_aliases)
if params['language'] == 'all':
lang = 'EN'
else:
lang = match_language(params['language'], supported_languages, language_aliases)
query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')
......
......@@ -55,7 +55,7 @@ def request(query, params):
query=urlencode({'q': query}),
offset=offset)
language = match_language(params['language'], supported_languages).lower()
language = match_language(params['language'], supported_languages, language_aliases).lower()
params['cookies']['SRCHHPGUSR'] = \
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
......@@ -88,9 +88,7 @@ def response(resp):
url = json_data.get('purl')
img_src = json_data.get('murl')
thumb_json_data = loads(_quote_keys_regex.sub(r'\1"\2": \3', link.attrib.get('mad')))
thumbnail = thumb_json_data.get('turl')
thumbnail = json_data.get('turl')
# append result
results.append({'template': 'images.html',
......
......@@ -71,7 +71,10 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1
language = match_language(params['language'], supported_languages, language_aliases)
if params['language'] == 'all':
language = 'en-US'
else:
language = match_language(params['language'], supported_languages, language_aliases)
params['url'] = _get_url(query, language, offset, params['time_range'])
......
......@@ -48,7 +48,7 @@ def request(query, params):
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
# language cookie
language = match_language(params['language'], supported_languages).lower()
language = match_language(params['language'], supported_languages, language_aliases).lower()
params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
# query and paging
......
......@@ -33,7 +33,10 @@ supported_languages_url = 'https://api.dailymotion.com/languages'
# do search-request
def request(query, params):
locale = match_language(params['language'], supported_languages)
if params['language'] == 'all':
locale = 'en-US'
else:
locale = match_language(params['language'], supported_languages)
params['url'] = search_url.format(
query=urlencode({'search': query, 'localization': locale}),
......
......@@ -54,6 +54,9 @@ content_xpath = './/a[@class="result__snippet"]'
# match query's language to a region code that duckduckgo will accept
def get_region_code(lang, lang_list=[]):
if lang == 'all':
return None
lang_code = match_language(lang, lang_list, language_aliases, 'wt-WT')
lang_parts = lang_code.split('-')
......@@ -61,7 +64,6 @@ def get_region_code(lang, lang_list=[]):
return lang_parts[1].lower() + '-' + lang_parts[0].lower()
# do search-request
def request(query, params):
if params['time_range'] and params['time_range'] not in time_range_dict:
return params
......@@ -69,8 +71,12 @@ def request(query, params):
offset = (params['pageno'] - 1) * 30
region_code = get_region_code(params['language'], supported_languages)
params['url'] = url.format(
query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset)
if region_code:
params['url'] = url.format(
query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset)
else:
params['url'] = url.format(
query=urlencode({'q': query}), offset=offset, dc_param=offset)
if params['time_range'] in time_range_dict:
params['url'] += time_range_url.format(range=time_range_dict[params['time_range']])
......
......@@ -56,8 +56,12 @@ def request(query, params):
safesearch = params['safesearch'] - 1
region_code = get_region_code(params['language'], lang_list=supported_languages)
params['url'] = images_url.format(
query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd)
if region_code:
params['url'] = images_url.format(
query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd)
else:
params['url'] = images_url.format(
query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
return params
......
......@@ -40,7 +40,10 @@ def request(query, params):
offset = (params['pageno'] - 1) * number_of_results + 1
categorie = search_category.get(params['category'], 'web')
language = params['language'].split('-')[0]
if params['language'] == 'all':
language = 'en'
else:
language = params['language'].split('-')[0]
# if language is not supported, put it in english
if language != 'en' and\
......
......@@ -50,9 +50,12 @@ supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
def request(query, params):
offset = (params['pageno'] - 1) * number_of_results
language = params['language'].replace('-', '_').lower()
if language.split('-')[0] != 'zh':
language = language.split('-')[0]
if params['language'] == 'all':
language = 'xx'
else:
language = params['language'].replace('-', '_').lower()
if language.split('-')[0] != 'zh':
language = language.split('-')[0]
if params['safesearch'] >= 1:
safesearch = 1
......
......@@ -166,7 +166,11 @@ def extract_text_from_dom(result, xpath):
def request(query, params):
offset = (params['pageno'] - 1) * 10
language = match_language(params['language'], supported_languages)
if params['language'] == 'all' or params['language'] == 'en-US':
language = 'en-GB'
else:
language = match_language(params['language'], supported_languages, language_aliases)
language_array = language.split('-')
if params['language'].find('-') > 0:
country = params['language'].split('-')[1]
......@@ -381,10 +385,10 @@ def attributes_to_html(attributes):
def _fetch_supported_languages(resp):
supported_languages = {}
dom = html.fromstring(resp.text)
options = dom.xpath('//table//td/font/label/span')
options = dom.xpath('//*[@id="langSec"]//input[@name="lr"]')
for option in options:
code = option.xpath('./@id')[0][1:]
name = option.text.title()
code = option.xpath('./@value')[0].split('_')[-1]
name = option.xpath('./@data-name')[0].title()
supported_languages[code] = {"name": name}
return supported_languages
......@@ -51,9 +51,10 @@ def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
search_options=urlencode(search_options))
language = match_language(params['language'], supported_languages).split('-')[0]
if language:
params['url'] += '&lr=lang_' + language
if params['language'] != 'all':
language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
if language:
params['url'] += '&lr=lang_' + language
return params
......
......@@ -7,7 +7,7 @@
@using-api no
@results HTML
@stable no
@parse url, title, content
@parse url, title, content, thumbnail
"""
from datetime import date, timedelta
......@@ -15,7 +15,7 @@ from json import loads
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
import re
# engine dependent config
categories = ['videos']
......@@ -25,7 +25,7 @@ time_range_support = True
number_of_results = 10
search_url = 'https://www.google.com/search'\
'?{query}'\
'?q={query}'\
'&tbm=vid'\
'&{search_options}'
time_range_attr = "qdr:{range}"
......@@ -69,15 +69,27 @@ def response(resp):
# parse results
for result in dom.xpath('//div[@class="g"]'):
title = extract_text(result.xpath('.//h3/a'))
url = result.xpath('.//h3/a/@href')[0]
title = extract_text(result.xpath('.//h3'))
url = result.xpath('.//div[@class="r"]/a/@href')[0]
content = extract_text(result.xpath('.//span[@class="st"]'))
# get thumbnails
script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
id = result.xpath('.//div[@class="s"]//img/@id')[0]
thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
script)
tmp = []
if len(thumbnails_data) != 0:
tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
thumbnail = ''
if len(tmp) != 0:
thumbnail = tmp[-1]
# append result
results.append({'url': url,
'title': title,
'content': content,
'thumbnail': '',
'thumbnail': thumbnail,
'template': 'videos.html'})
return results
......@@ -45,7 +45,10 @@ def request(query, params):
format_strings = list(Formatter().parse(base_url))
language = params['language'].split('-')[0]
if params['language'] == 'all':
language = 'en'
else:
language = params['language'].split('-')[0]
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
if any(x[1] == 'language' for x in format_strings):
......
......@@ -35,9 +35,10 @@ def request(query, params):
search_string.format(query=urlencode({'q': query}),
limit=number_of_results)
language = params['language'].split('-')[0]
if language in supported_languages:
params['url'] = params['url'] + "&lang=" + language
if params['language'] != 'all':
language = params['language'].split('_')[0]
if language in supported_languages:
params['url'] = params['url'] + "&lang=" + language
# using searx User-Agent
params['headers']['User-Agent'] = searx_useragent()
......
......@@ -46,8 +46,9 @@ def request(query, params):
offset=offset)
# add language tag
language = match_language(params['language'], supported_languages)
params['url'] += '&locale=' + language.replace('-', '_').lower()
if params['language'] != 'all':
language = match_language(params['language'], supported_languages, language_aliases)
params['url'] += '&locale=' + language.replace('-', '_').lower()
return params
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment