Commit 2303d070 authored by Gaël Duval's avatar Gaël Duval 🏄🏼
Browse files

Merge branch 'nicofonk/sync-upstream' into 'master'

sync upstream v0.15.0 to get last changes and fixes

See merge request e/cloud/my-spot!17
parents 491dc82f 0cc06a29
...@@ -85,6 +85,21 @@ generally made searx better: ...@@ -85,6 +85,21 @@ generally made searx better:
- Joseph Nuthalapati @josephkiranbabu - Joseph Nuthalapati @josephkiranbabu
- @maiki - @maiki
- Richard Didier @zeph33 - Richard Didier @zeph33
- Michael Vieria @Themimitoof
- Richard Nespithal @rndevfx
- Stanislas @angristan
- @rinpatch
- g. s. @usernameisntallowed
- Léo Bourrel @bourrel
- @cy8aer
- @Popolon
- Alice Ferrazzi @aliceinwire
- @LiquidLemon
- @dadosch
- @Venca24
- @ZEROF
- Ivan Skytte Jørgensen @isj-privacore
- @miicha
People who contributed to Spot: People who contributed to Spot:
......
0.15.0 2019.01.06
=================
- New engines
- Acgsou (files, images, videos, music)
- Duden.de (general)
- Seznam (general)
- Mojeek (general)
- New languages
- Catalan
- Welsh
- Basque
- Persian (Iran)
- Galician
- Dutch (Belgium)
- Telugu
- Vietnamese
- New random answerers
- sha256
- uuidv4
- New DOI resolsvers
- sci-hub.tw
- Fix Vim mode on Firefox
- Fix custom select in Oscar theme
- Engine fixes (duckduckgo, google news, currency convert, gigablast, google scholar, wikidata image, etymonline, google videos, startpage, bing image)
- Minor simple theme fixes
- New Youtube icon in Oscar theme
- Get DOI rewriters from settings.yml
- Hide page buttons when infinite scrolling is enabled
- Update user agent versions
- Make Oscar style configurable
- Make suspend times of errored engines configurable
0.14.0 2018.02.19 0.14.0 2018.02.19
================= =================
......
...@@ -32,6 +32,7 @@ RUN echo "@commuedge http://nl.alpinelinux.org/alpine/edge/community" >> /etc/ap ...@@ -32,6 +32,7 @@ RUN echo "@commuedge http://nl.alpinelinux.org/alpine/edge/community" >> /etc/ap
openssl-dev \ openssl-dev \
ca-certificates \ ca-certificates \
tini@commuedge \ tini@commuedge \
&& pip install --upgrade pip \
&& pip install --no-cache -r requirements.txt \ && pip install --no-cache -r requirements.txt \
&& apk del \ && apk del \
build-base \ build-base \
......
This diff is collapsed.
...@@ -88,9 +88,7 @@ def response(resp): ...@@ -88,9 +88,7 @@ def response(resp):
url = json_data.get('purl') url = json_data.get('purl')
img_src = json_data.get('murl') img_src = json_data.get('murl')
thumbnail = json_data.get('turl')
thumb_json_data = loads(_quote_keys_regex.sub(r'\1"\2": \3', link.attrib.get('mad')))
thumbnail = thumb_json_data.get('turl')
# append result # append result
results.append({'template': 'images.html', results.append({'template': 'images.html',
......
"""
FindX (General, Images, Videos)
@website https://www.findx.com
@provide-api no
@using-api no
@results HTML
@stable no
@parse url, title, content, embedded, img_src, thumbnail_src
"""
from dateutil import parser
from json import loads
import re
from lxml import html
from searx import logger
from searx.engines.xpath import extract_text
from searx.engines.youtube_noapi import base_youtube_url, embedded_url
from searx.url_utils import urlencode
paging = True
results_xpath = '//script[@id="initial-state"]'
search_url = 'https://www.findx.com/{category}?{q}'
type_map = {
'none': 'web',
'general': 'web',
'images': 'images',
'videos': 'videos',
}
def request(query, params):
params['url'] = search_url.format(
category=type_map[params['category']],
q=urlencode({
'q': query,
'page': params['pageno']
})
)
return params
def response(resp):
dom = html.fromstring(resp.text)
results_raw_json = dom.xpath(results_xpath)
results_json = loads(extract_text(results_raw_json))
if len(results_json['web']['results']) > 0:
return _general_results(results_json['web']['results']['webSearch']['results'])
if len(results_json['images']['results']) > 0:
return _images_results(results_json['images']['results'])
if len(results_json['video']['results']) > 0:
return _videos_results(results_json['video']['results'])
return []
def _general_results(general_results):
results = []
for result in general_results:
results.append({
'url': result['url'],
'title': result['title'],
'content': result['sum'],
})
return results
def _images_results(image_results):
results = []
for result in image_results:
results.append({
'url': result['sourceURL'],
'title': result['title'],
'content': result['source'],
'thumbnail_src': _extract_url(result['assets']['thumb']['url']),
'img_src': _extract_url(result['assets']['file']['url']),
'template': 'images.html',
})
return results
def _videos_results(video_results):
results = []
for result in video_results:
if not result['kind'].startswith('youtube'):
logger.warn('Unknown video kind in findx: {}'.format(result['kind']))
continue
description = result['snippet']['description']
if len(description) > 300:
description = description[:300] + '...'
results.append({
'url': base_youtube_url + result['id'],
'title': result['snippet']['title'],
'content': description,
'thumbnail': _extract_url(result['snippet']['thumbnails']['default']['url']),
'publishedDate': parser.parse(result['snippet']['publishedAt']),
'embedded': embedded_url.format(videoid=result['id']),
'template': 'videos.html',
})
return results
def _extract_url(url):
matching = re.search('(/https?://[^)]+)', url)
if matching:
return matching.group(0)[1:]
return ''
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
@using-api no @using-api no
@results HTML @results HTML
@stable no @stable no
@parse url, title, content @parse url, title, content, thumbnail
""" """
from datetime import date, timedelta from datetime import date, timedelta
...@@ -15,7 +15,7 @@ from json import loads ...@@ -15,7 +15,7 @@ from json import loads
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode from searx.url_utils import urlencode
import re
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']
...@@ -25,7 +25,7 @@ time_range_support = True ...@@ -25,7 +25,7 @@ time_range_support = True
number_of_results = 10 number_of_results = 10
search_url = 'https://www.google.com/search'\ search_url = 'https://www.google.com/search'\
'?{query}'\ '?q={query}'\
'&tbm=vid'\ '&tbm=vid'\
'&{search_options}' '&{search_options}'
time_range_attr = "qdr:{range}" time_range_attr = "qdr:{range}"
...@@ -69,15 +69,27 @@ def response(resp): ...@@ -69,15 +69,27 @@ def response(resp):
# parse results # parse results
for result in dom.xpath('//div[@class="g"]'): for result in dom.xpath('//div[@class="g"]'):
title = extract_text(result.xpath('.//h3/a')) title = extract_text(result.xpath('.//h3'))
url = result.xpath('.//h3/a/@href')[0] url = result.xpath('.//div[@class="r"]/a/@href')[0]
content = extract_text(result.xpath('.//span[@class="st"]')) content = extract_text(result.xpath('.//span[@class="st"]'))
# get thumbnails
script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
id = result.xpath('.//div[@class="s"]//img/@id')[0]
thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
script)
tmp = []
if len(thumbnails_data) != 0:
tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
thumbnail = ''
if len(tmp) != 0:
thumbnail = tmp[-1]
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,
'content': content, 'content': content,
'thumbnail': '', 'thumbnail': thumbnail,
'template': 'videos.html'}) 'template': 'videos.html'})
return results return results
...@@ -32,8 +32,9 @@ search_url = base_url + 'do/search' ...@@ -32,8 +32,9 @@ search_url = base_url + 'do/search'
# specific xpath variables # specific xpath variables
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
# not ads: div[@class="result"] are the direct childs of div[@id="results"] # not ads: div[@class="result"] are the direct childs of div[@id="results"]
results_xpath = '//div[@class="result"]' results_xpath = '//li[contains(@class, "search-result") and contains(@class, "search-item")]'
link_xpath = './/h3/a' link_xpath = './/h3/a'
content_xpath = './p[@class="search-item__body"]'
# do search-request # do search-request
...@@ -73,14 +74,10 @@ def response(resp): ...@@ -73,14 +74,10 @@ def response(resp):
if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url): if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
continue continue
# block ixquick search url's
if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
continue
title = extract_text(link) title = extract_text(link)
if result.xpath('./p[@class="desc clk"]'): if result.xpath(content_xpath):
content = extract_text(result.xpath('./p[@class="desc clk"]')) content = extract_text(result.xpath(content_xpath))
else: else:
content = '' content = ''
......
...@@ -226,24 +226,6 @@ engines: ...@@ -226,24 +226,6 @@ engines:
shortcut : fd shortcut : fd
disabled : True disabled : True
- name : findx
engine : findx
shortcut : fx
categories : general
disabled : True
- name : findx images
engine : findx
shortcut : fxi
categories : images
disabled : True
- name : findx videos
engine : findx
shortcut : fxv
categories : videos
disabled : True
- name : flickr - name : flickr
categories : images categories : images
shortcut : fl shortcut : fl
...@@ -605,14 +587,6 @@ engines: ...@@ -605,14 +587,6 @@ engines:
timeout : 6.0 timeout : 6.0
disabled : True disabled : True
- name : ixquick
engine : startpage
base_url : 'https://www.ixquick.eu/'
search_url : 'https://www.ixquick.eu/do/search'
shortcut : iq
timeout : 6.0
disabled : True
- name : swisscows - name : swisscows
engine : swisscows engine : swisscows
shortcut : sw shortcut : sw
...@@ -731,6 +705,33 @@ engines: ...@@ -731,6 +705,33 @@ engines:
shortcut : du shortcut : du
disabled : True disabled : True
- name : seznam
shortcut: szn
engine: xpath
paging : True
search_url : https://search.seznam.cz/?q={query}&count=10&from={pageno}
results_xpath: //div[@class="Page-content"]//div[@class="Result "]
url_xpath : ./h3/a/@href
title_xpath : ./h3
content_xpath : .//p[@class="Result-description"]
first_page_num : 0
page_size : 10
disabled : True
- name : mojeek
shortcut: mjk
engine: xpath
paging : True
search_url : https://www.mojeek.com/search?q={query}&s={pageno}
results_xpath: /html/body//div[@class="results"]/ul[@class="results-standard"]/li
url_xpath : ./h2/a/@href
title_xpath : ./h2
content_xpath : ./p[@class="s"]
suggestion_xpath : /html/body//div[@class="top-info"]/p[@class="top-info spell"]/a
first_page_num : 1
page_size : 10
disabled : True
# - name : yacy # - name : yacy
# engine : yacy # engine : yacy
# shortcut : ya # shortcut : ya
...@@ -749,21 +750,27 @@ locales: ...@@ -749,21 +750,27 @@ locales:
en : English en : English
ar : العَرَبِيَّة (Arabic) ar : العَرَبِيَّة (Arabic)
bg : Български (Bulgarian) bg : Български (Bulgarian)
ca : Català (Catalan)
cs : Čeština (Czech) cs : Čeština (Czech)
cy : Cymraeg (Welsh)
da : Dansk (Danish) da : Dansk (Danish)
de : Deutsch (German) de : Deutsch (German)
el_GR : Ελληνικά (Greek_Greece) el_GR : Ελληνικά (Greek_Greece)
eo : Esperanto (Esperanto) eo : Esperanto (Esperanto)
es : Español (Spanish) es : Español (Spanish)
eu : Euskara (Basque)
fa_IR : (fārsī) فارسى (Persian)
fi : Suomi (Finnish) fi : Suomi (Finnish)
fil : Wikang Filipino (Filipino) fil : Wikang Filipino (Filipino)
fr : Français (French) fr : Français (French)
gl : Galego (Galician)
he : עברית (Hebrew) he : עברית (Hebrew)
hr : Hrvatski (Croatian) hr : Hrvatski (Croatian)
hu : Magyar (Hungarian) hu : Magyar (Hungarian)
it : Italiano (Italian) it : Italiano (Italian)
ja : 日本語 (Japanese) ja : 日本語 (Japanese)
nl : Nederlands (Dutch) nl : Nederlands (Dutch)
nl_BE : Vlaams (Dutch_Belgium)
pl : Polski (Polish) pl : Polski (Polish)
pt : Português (Portuguese) pt : Português (Portuguese)
pt_BR : Português (Portuguese_Brazil) pt_BR : Português (Portuguese_Brazil)
...@@ -773,8 +780,10 @@ locales: ...@@ -773,8 +780,10 @@ locales:
sl : Slovenski (Slovene) sl : Slovenski (Slovene)
sr : српски (Serbian) sr : српски (Serbian)
sv : Svenska (Swedish) sv : Svenska (Swedish)
te : తెలుగు (telugu)
tr : Türkçe (Turkish) tr : Türkçe (Turkish)
uk : українська мова (Ukrainian) uk : українська мова (Ukrainian)
vi : tiếng việt (㗂越)
zh : 中文 (Chinese) zh : 中文 (Chinese)
zh_TW : 國語 (Taiwanese Mandarin) zh_TW : 國語 (Taiwanese Mandarin)
......
...@@ -3,14 +3,15 @@ ...@@ -3,14 +3,15 @@
# This file is distributed under the same license as the PROJECT project. # This file is distributed under the same license as the PROJECT project.
# #
# Translators: # Translators:
# ButterflyOfFire <ButterflyOfFire@protonmail.com>, 2017-2018 # ButterflyOfFire ButterflyOfFire, 2018
# ButterflyOfFire, 2017-2018
msgid "" msgid ""
msgstr "" msgstr ""
"Project-Id-Version: searx\n" "Project-Id-Version: searx\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2017-11-01 21:31+0100\n" "POT-Creation-Date: 2017-11-01 21:31+0100\n"
"PO-Revision-Date: 2018-01-23 17:54+0000\n" "PO-Revision-Date: 2018-09-08 08:23+0000\n"
"Last-Translator: ButterflyOfFire <ButterflyOfFire@protonmail.com>\n" "Last-Translator: ButterflyOfFire ButterflyOfFire\n"
"Language-Team: Arabic (http://www.transifex.com/asciimoo/searx/language/ar/)\n" "Language-Team: Arabic (http://www.transifex.com/asciimoo/searx/language/ar/)\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
...@@ -29,7 +30,7 @@ msgstr "" ...@@ -29,7 +30,7 @@ msgstr ""
#: searx/search.py:151 #: searx/search.py:151
msgid "unexpected crash" msgid "unexpected crash"
msgstr "" msgstr "خلل غير مُتوقّع"
#: searx/webapp.py:136 #: searx/webapp.py:136
msgid "files" msgid "files"
...@@ -73,7 +74,7 @@ msgstr "علوم" ...@@ -73,7 +74,7 @@ msgstr "علوم"
#: searx/webapp.py:399 searx/webapp.py:658 #: searx/webapp.py:399 searx/webapp.py:658
msgid "Invalid settings, please edit your preferences" msgid "Invalid settings, please edit your preferences"
msgstr "" msgstr "إنّ الإعدادات خاطئة، يرجى تعديل خياراتك"
#: searx/webapp.py:415 #: searx/webapp.py:415
msgid "Invalid settings" msgid "Invalid settings"
...@@ -97,7 +98,7 @@ msgstr "مولّد قيمة عشوائية" ...@@ -97,7 +98,7 @@ msgstr "مولّد قيمة عشوائية"
#: searx/answerers/random/answerer.py:54 #: searx/answerers/random/answerer.py:54
msgid "Generate different random values" msgid "Generate different random values"
msgstr "" msgstr "توليد قِيم عشوائية مختلفة"
#: searx/answerers/statistics/answerer.py:53 #: searx/answerers/statistics/answerer.py:53
msgid "Statistics functions" msgid "Statistics functions"
...@@ -288,7 +289,7 @@ msgstr "اللغة الإفتراضية" ...@@ -288,7 +289,7 @@ msgstr "اللغة الإفتراضية"
#: searx/templates/pix-art/preferences.html:20 #: searx/templates/pix-art/preferences.html:20
#: searx/templates/simple/preferences.html:120 #: searx/templates/simple/preferences.html:120
msgid "Interface language" msgid "Interface language"
msgstr "لغة العرض" msgstr "لغة الواجهة"
#: searx/templates/courgette/preferences.html:34 #: searx/templates/courgette/preferences.html:34
#: searx/templates/legacy/preferences.html:35 #: searx/templates/legacy/preferences.html:35
...@@ -371,7 +372,7 @@ msgstr "اللون" ...@@ -371,7 +372,7 @@ msgstr "اللون"
#: searx/templates/courgette/preferences.html:86 #: searx/templates/courgette/preferences.html:86
msgid "Blue (default)" msgid "Blue (default)"
msgstr "أزرق )إفتراضي(" msgstr "أزرق (إفتراضي)"
#: searx/templates/courgette/preferences.html:87 #: searx/templates/courgette/preferences.html:87
msgid "Violet" msgid "Violet"
...@@ -581,13 +582,13 @@ msgstr "عرض نتائج البحث في ألسنة جديدة" ...@@ -581,13 +582,13 @@ msgstr "عرض نتائج البحث في ألسنة جديدة"
#: searx/templates/oscar/preferences.html:117 #: searx/templates/oscar/preferences.html:117
#: searx/templates/simple/preferences.html:145 #: searx/templates/simple/preferences.html:145
msgid "On" msgid "On"
msgstr "" msgstr "يشتغل"
#: searx/templates/legacy/preferences.html:88 #: searx/templates/legacy/preferences.html:88
#: searx/templates/oscar/preferences.html:118 #: searx/templates/oscar/preferences.html:118
#: searx/templates/simple/preferences.html:146 #: searx/templates/simple/preferences.html:146
msgid "Off" msgid "Off"
msgstr "" msgstr "مُعطَّل"
#: searx/templates/legacy/result_templates/code.html:3 #: searx/templates/legacy/result_templates/code.html:3
#: searx/templates/legacy/result_templates/default.html:3 #: searx/templates/legacy/result_templates/default.html:3
...@@ -626,7 +627,7 @@ msgstr "محرك بحث يحمي الخصوصية و قابل للتهكير" ...@@ -626,7 +627,7 @@ msgstr "محرك بحث يحمي الخصوصية و قابل للتهكير"
#: searx/templates/oscar/macros.html:36 searx/templates/oscar/macros.html:50 #: searx/templates/oscar/macros.html:36 searx/templates/oscar/macros.html:50
#: searx/templates/simple/macros.html:43 #: searx/templates/simple/macros.html:43
msgid "proxied" msgid "proxied"
msgstr "يمر عبر البروكسي" msgstr "النفاذ عبر البروكسي"
#: searx/templates/oscar/macros.html:92 #: searx/templates/oscar/macros.html:92
msgid "supported" msgid "supported"
...@@ -661,7 +662,7 @@ msgstr "المجيبون" ...@@ -661,7 +662,7 @@ msgstr "المجيبون"
#: searx/templates/oscar/preferences.html:17 #: searx/templates/oscar/preferences.html:17
#: searx/templates/oscar/preferences.html:272 #: searx/templates/oscar/preferences.html:272
msgid "Cookies" msgid "Cookies"
msgstr "الكوكيز" msgstr "كعكات الكوكيز"
#: searx/templates/oscar/preferences.html:42 #: searx/templates/oscar/preferences.html:42
#: searx/templates/simple/preferences.html:48 #: searx/templates/simple/preferences.html:48
...@@ -676,12 +677,12 @@ msgstr "يقوم بتغيير لغة واجهة البحث" ...@@ -676,12 +677,12 @@ msgstr "يقوم بتغيير لغة واجهة البحث"
#: searx/templates/oscar/preferences.html:58