Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
e
infra
spot
Commits
25367cfb
Unverified
Commit
25367cfb
authored
Jan 15, 2019
by
Noémi Ványi
Committed by
GitHub
Jan 15, 2019
Browse files
Merge branch 'master' into setup-no-tests
parents
4977ea54
383e3cc5
Changes
138
Expand all
Hide whitespace changes
Inline
Side-by-side
AUTHORS.rst
View file @
25367cfb
...
...
@@ -83,3 +83,18 @@ generally made searx better:
- Joseph Nuthalapati @josephkiranbabu
- @maiki
- Richard Didier @zeph33
- Michael Vieria @Themimitoof
- Richard Nespithal @rndevfx
- Stanislas @angristan
- @rinpatch
- g. s. @usernameisntallowed
- Léo Bourrel @bourrel
- @cy8aer
- @Popolon
- Alice Ferrazzi @aliceinwire
- @LiquidLemon
- @dadosch
- @Venca24
- @ZEROF
- Ivan Skytte Jørgensen @isj-privacore
- @miicha
CHANGELOG.rst
View file @
25367cfb
0.15.0 2019.01.06
=================
- New engines
- Acgsou (files, images, videos, music)
- Duden.de (general)
- Seznam (general)
- Mojeek (general)
- New languages
- Catalan
- Welsh
- Basque
- Persian (Iran)
- Galician
- Dutch (Belgium)
- Telugu
- Vietnamese
- New random answerers
- sha256
- uuidv4
- New DOI resolsvers
- sci-hub.tw
- Fix Vim mode on Firefox
- Fix custom select in Oscar theme
- Engine fixes (duckduckgo, google news, currency convert, gigablast, google scholar, wikidata image, etymonline, google videos, startpage, bing image)
- Minor simple theme fixes
- New Youtube icon in Oscar theme
- Get DOI rewriters from settings.yml
- Hide page buttons when infinite scrolling is enabled
- Update user agent versions
- Make Oscar style configurable
- Make suspend times of errored engines configurable
0.14.0 2018.02.19
=================
...
...
Dockerfile
View file @
25367cfb
...
...
@@ -32,6 +32,7 @@ RUN echo "@commuedge http://nl.alpinelinux.org/alpine/edge/community" >> /etc/ap
openssl-dev
\
ca-certificates
\
tini@commuedge
\
&&
pip
install
--upgrade
pip
\
&&
pip
install
--no-cache
-r
requirements.txt
\
&&
apk del
\
build-base
\
...
...
searx/data/engines_languages.json
View file @
25367cfb
This diff is collapsed.
Click to expand it.
searx/engines/__init__.py
View file @
25367cfb
...
...
@@ -113,8 +113,7 @@ def load_engine(engine_data):
iso_lang
not
in
getattr
(
engine
,
'supported_languages'
):
language_aliases
[
iso_lang
]
=
engine_lang
if
language_aliases
:
setattr
(
engine
,
'language_aliases'
,
language_aliases
)
setattr
(
engine
,
'language_aliases'
,
language_aliases
)
# assign language fetching method if auxiliary method exists
if
hasattr
(
engine
,
'_fetch_supported_languages'
):
...
...
searx/engines/archlinux.py
View file @
25367cfb
...
...
@@ -36,7 +36,7 @@ def locale_to_lang_code(locale):
# wikis for some languages were moved off from the main site, we need to make
# requests to correct URLs to be able to get results in those languages
lang_urls
=
{
'
en
'
:
{
'
all
'
:
{
'base'
:
'https://wiki.archlinux.org'
,
'search'
:
'/index.php?title=Special:Search&offset={offset}&{query}'
},
...
...
@@ -67,7 +67,7 @@ lang_urls = {
def
get_lang_urls
(
language
):
if
language
in
lang_urls
:
return
lang_urls
[
language
]
return
lang_urls
[
'
en
'
]
return
lang_urls
[
'
all
'
]
# Language names to build search requests for
...
...
searx/engines/bing.py
View file @
25367cfb
...
...
@@ -34,7 +34,10 @@ search_string = 'search?{query}&first={offset}'
def
request
(
query
,
params
):
offset
=
(
params
[
'pageno'
]
-
1
)
*
10
+
1
lang
=
match_language
(
params
[
'language'
],
supported_languages
,
language_aliases
)
if
params
[
'language'
]
==
'all'
:
lang
=
'EN'
else
:
lang
=
match_language
(
params
[
'language'
],
supported_languages
,
language_aliases
)
query
=
u
'language:{} {}'
.
format
(
lang
.
split
(
'-'
)[
0
].
upper
(),
query
.
decode
(
'utf-8'
)).
encode
(
'utf-8'
)
...
...
searx/engines/bing_images.py
View file @
25367cfb
...
...
@@ -55,7 +55,7 @@ def request(query, params):
query
=
urlencode
({
'q'
:
query
}),
offset
=
offset
)
language
=
match_language
(
params
[
'language'
],
supported_languages
).
lower
()
language
=
match_language
(
params
[
'language'
],
supported_languages
,
language_aliases
).
lower
()
params
[
'cookies'
][
'SRCHHPGUSR'
]
=
\
'ADLT='
+
safesearch_types
.
get
(
params
[
'safesearch'
],
'DEMOTE'
)
...
...
@@ -88,9 +88,7 @@ def response(resp):
url
=
json_data
.
get
(
'purl'
)
img_src
=
json_data
.
get
(
'murl'
)
thumb_json_data
=
loads
(
_quote_keys_regex
.
sub
(
r
'\1"\2": \3'
,
link
.
attrib
.
get
(
'mad'
)))
thumbnail
=
thumb_json_data
.
get
(
'turl'
)
thumbnail
=
json_data
.
get
(
'turl'
)
# append result
results
.
append
({
'template'
:
'images.html'
,
...
...
searx/engines/bing_news.py
View file @
25367cfb
...
...
@@ -71,7 +71,10 @@ def request(query, params):
offset
=
(
params
[
'pageno'
]
-
1
)
*
10
+
1
language
=
match_language
(
params
[
'language'
],
supported_languages
,
language_aliases
)
if
params
[
'language'
]
==
'all'
:
language
=
'en-US'
else
:
language
=
match_language
(
params
[
'language'
],
supported_languages
,
language_aliases
)
params
[
'url'
]
=
_get_url
(
query
,
language
,
offset
,
params
[
'time_range'
])
...
...
searx/engines/bing_videos.py
View file @
25367cfb
...
...
@@ -48,7 +48,7 @@ def request(query, params):
'ADLT='
+
safesearch_types
.
get
(
params
[
'safesearch'
],
'DEMOTE'
)
# language cookie
language
=
match_language
(
params
[
'language'
],
supported_languages
).
lower
()
language
=
match_language
(
params
[
'language'
],
supported_languages
,
language_aliases
).
lower
()
params
[
'cookies'
][
'_EDGE_S'
]
=
'mkt='
+
language
+
'&F=1'
# query and paging
...
...
searx/engines/dailymotion.py
View file @
25367cfb
...
...
@@ -33,7 +33,10 @@ supported_languages_url = 'https://api.dailymotion.com/languages'
# do search-request
def
request
(
query
,
params
):
locale
=
match_language
(
params
[
'language'
],
supported_languages
)
if
params
[
'language'
]
==
'all'
:
locale
=
'en-US'
else
:
locale
=
match_language
(
params
[
'language'
],
supported_languages
)
params
[
'url'
]
=
search_url
.
format
(
query
=
urlencode
({
'search'
:
query
,
'localization'
:
locale
}),
...
...
searx/engines/duckduckgo.py
View file @
25367cfb
...
...
@@ -54,6 +54,9 @@ content_xpath = './/a[@class="result__snippet"]'
# match query's language to a region code that duckduckgo will accept
def
get_region_code
(
lang
,
lang_list
=
[]):
if
lang
==
'all'
:
return
None
lang_code
=
match_language
(
lang
,
lang_list
,
language_aliases
,
'wt-WT'
)
lang_parts
=
lang_code
.
split
(
'-'
)
...
...
@@ -61,7 +64,6 @@ def get_region_code(lang, lang_list=[]):
return
lang_parts
[
1
].
lower
()
+
'-'
+
lang_parts
[
0
].
lower
()
# do search-request
def
request
(
query
,
params
):
if
params
[
'time_range'
]
and
params
[
'time_range'
]
not
in
time_range_dict
:
return
params
...
...
@@ -69,8 +71,12 @@ def request(query, params):
offset
=
(
params
[
'pageno'
]
-
1
)
*
30
region_code
=
get_region_code
(
params
[
'language'
],
supported_languages
)
params
[
'url'
]
=
url
.
format
(
query
=
urlencode
({
'q'
:
query
,
'kl'
:
region_code
}),
offset
=
offset
,
dc_param
=
offset
)
if
region_code
:
params
[
'url'
]
=
url
.
format
(
query
=
urlencode
({
'q'
:
query
,
'kl'
:
region_code
}),
offset
=
offset
,
dc_param
=
offset
)
else
:
params
[
'url'
]
=
url
.
format
(
query
=
urlencode
({
'q'
:
query
}),
offset
=
offset
,
dc_param
=
offset
)
if
params
[
'time_range'
]
in
time_range_dict
:
params
[
'url'
]
+=
time_range_url
.
format
(
range
=
time_range_dict
[
params
[
'time_range'
]])
...
...
searx/engines/duckduckgo_images.py
View file @
25367cfb
...
...
@@ -56,8 +56,12 @@ def request(query, params):
safesearch
=
params
[
'safesearch'
]
-
1
region_code
=
get_region_code
(
params
[
'language'
],
lang_list
=
supported_languages
)
params
[
'url'
]
=
images_url
.
format
(
query
=
urlencode
({
'q'
:
query
,
'l'
:
region_code
}),
offset
=
offset
,
safesearch
=
safesearch
,
vqd
=
vqd
)
if
region_code
:
params
[
'url'
]
=
images_url
.
format
(
query
=
urlencode
({
'q'
:
query
,
'l'
:
region_code
}),
offset
=
offset
,
safesearch
=
safesearch
,
vqd
=
vqd
)
else
:
params
[
'url'
]
=
images_url
.
format
(
query
=
urlencode
({
'q'
:
query
}),
offset
=
offset
,
safesearch
=
safesearch
,
vqd
=
vqd
)
return
params
...
...
searx/engines/faroo.py
View file @
25367cfb
...
...
@@ -40,7 +40,10 @@ def request(query, params):
offset
=
(
params
[
'pageno'
]
-
1
)
*
number_of_results
+
1
categorie
=
search_category
.
get
(
params
[
'category'
],
'web'
)
language
=
params
[
'language'
].
split
(
'-'
)[
0
]
if
params
[
'language'
]
==
'all'
:
language
=
'en'
else
:
language
=
params
[
'language'
].
split
(
'-'
)[
0
]
# if language is not supported, put it in english
if
language
!=
'en'
and
\
...
...
searx/engines/findx.py
deleted
100644 → 0
View file @
4977ea54
"""
FindX (General, Images, Videos)
@website https://www.findx.com
@provide-api no
@using-api no
@results HTML
@stable no
@parse url, title, content, embedded, img_src, thumbnail_src
"""
from
dateutil
import
parser
from
json
import
loads
import
re
from
lxml
import
html
from
searx
import
logger
from
searx.engines.xpath
import
extract_text
from
searx.engines.youtube_noapi
import
base_youtube_url
,
embedded_url
from
searx.url_utils
import
urlencode
paging
=
True
results_xpath
=
'//script[@id="initial-state"]'
search_url
=
'https://www.findx.com/{category}?{q}'
type_map
=
{
'none'
:
'web'
,
'general'
:
'web'
,
'images'
:
'images'
,
'videos'
:
'videos'
,
}
def
request
(
query
,
params
):
params
[
'url'
]
=
search_url
.
format
(
category
=
type_map
[
params
[
'category'
]],
q
=
urlencode
({
'q'
:
query
,
'page'
:
params
[
'pageno'
]
})
)
return
params
def
response
(
resp
):
dom
=
html
.
fromstring
(
resp
.
text
)
results_raw_json
=
dom
.
xpath
(
results_xpath
)
results_json
=
loads
(
extract_text
(
results_raw_json
))
if
len
(
results_json
[
'web'
][
'results'
])
>
0
:
return
_general_results
(
results_json
[
'web'
][
'results'
][
'webSearch'
][
'results'
])
if
len
(
results_json
[
'images'
][
'results'
])
>
0
:
return
_images_results
(
results_json
[
'images'
][
'results'
])
if
len
(
results_json
[
'video'
][
'results'
])
>
0
:
return
_videos_results
(
results_json
[
'video'
][
'results'
])
return
[]
def
_general_results
(
general_results
):
results
=
[]
for
result
in
general_results
:
results
.
append
({
'url'
:
result
[
'url'
],
'title'
:
result
[
'title'
],
'content'
:
result
[
'sum'
],
})
return
results
def
_images_results
(
image_results
):
results
=
[]
for
result
in
image_results
:
results
.
append
({
'url'
:
result
[
'sourceURL'
],
'title'
:
result
[
'title'
],
'content'
:
result
[
'source'
],
'thumbnail_src'
:
_extract_url
(
result
[
'assets'
][
'thumb'
][
'url'
]),
'img_src'
:
_extract_url
(
result
[
'assets'
][
'file'
][
'url'
]),
'template'
:
'images.html'
,
})
return
results
def
_videos_results
(
video_results
):
results
=
[]
for
result
in
video_results
:
if
not
result
[
'kind'
].
startswith
(
'youtube'
):
logger
.
warn
(
'Unknown video kind in findx: {}'
.
format
(
result
[
'kind'
]))
continue
description
=
result
[
'snippet'
][
'description'
]
if
len
(
description
)
>
300
:
description
=
description
[:
300
]
+
'...'
results
.
append
({
'url'
:
base_youtube_url
+
result
[
'id'
],
'title'
:
result
[
'snippet'
][
'title'
],
'content'
:
description
,
'thumbnail'
:
_extract_url
(
result
[
'snippet'
][
'thumbnails'
][
'default'
][
'url'
]),
'publishedDate'
:
parser
.
parse
(
result
[
'snippet'
][
'publishedAt'
]),
'embedded'
:
embedded_url
.
format
(
videoid
=
result
[
'id'
]),
'template'
:
'videos.html'
,
})
return
results
def
_extract_url
(
url
):
matching
=
re
.
search
(
'(/https?://[^)]+)'
,
url
)
if
matching
:
return
matching
.
group
(
0
)[
1
:]
return
''
searx/engines/gigablast.py
View file @
25367cfb
...
...
@@ -50,9 +50,12 @@ supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
def
request
(
query
,
params
):
offset
=
(
params
[
'pageno'
]
-
1
)
*
number_of_results
language
=
params
[
'language'
].
replace
(
'-'
,
'_'
).
lower
()
if
language
.
split
(
'-'
)[
0
]
!=
'zh'
:
language
=
language
.
split
(
'-'
)[
0
]
if
params
[
'language'
]
==
'all'
:
language
=
'xx'
else
:
language
=
params
[
'language'
].
replace
(
'-'
,
'_'
).
lower
()
if
language
.
split
(
'-'
)[
0
]
!=
'zh'
:
language
=
language
.
split
(
'-'
)[
0
]
if
params
[
'safesearch'
]
>=
1
:
safesearch
=
1
...
...
searx/engines/google.py
View file @
25367cfb
...
...
@@ -166,7 +166,11 @@ def extract_text_from_dom(result, xpath):
def
request
(
query
,
params
):
offset
=
(
params
[
'pageno'
]
-
1
)
*
10
language
=
match_language
(
params
[
'language'
],
supported_languages
)
if
params
[
'language'
]
==
'all'
or
params
[
'language'
]
==
'en-US'
:
language
=
'en-GB'
else
:
language
=
match_language
(
params
[
'language'
],
supported_languages
,
language_aliases
)
language_array
=
language
.
split
(
'-'
)
if
params
[
'language'
].
find
(
'-'
)
>
0
:
country
=
params
[
'language'
].
split
(
'-'
)[
1
]
...
...
@@ -381,10 +385,10 @@ def attributes_to_html(attributes):
def
_fetch_supported_languages
(
resp
):
supported_languages
=
{}
dom
=
html
.
fromstring
(
resp
.
text
)
options
=
dom
.
xpath
(
'//
table//td/font/label/span
'
)
options
=
dom
.
xpath
(
'//
*[@id="langSec"]//input[@name="lr"]
'
)
for
option
in
options
:
code
=
option
.
xpath
(
'./@
id'
)[
0
][
1
:
]
name
=
option
.
text
.
title
()
code
=
option
.
xpath
(
'./@
value'
)[
0
].
split
(
'_'
)[
-
1
]
name
=
option
.
xpath
(
'./@data-name'
)[
0
]
.
title
()
supported_languages
[
code
]
=
{
"name"
:
name
}
return
supported_languages
searx/engines/google_news.py
View file @
25367cfb
...
...
@@ -51,9 +51,10 @@ def request(query, params):
params
[
'url'
]
=
search_url
.
format
(
query
=
urlencode
({
'q'
:
query
}),
search_options
=
urlencode
(
search_options
))
language
=
match_language
(
params
[
'language'
],
supported_languages
).
split
(
'-'
)[
0
]
if
language
:
params
[
'url'
]
+=
'&lr=lang_'
+
language
if
params
[
'language'
]
!=
'all'
:
language
=
match_language
(
params
[
'language'
],
supported_languages
,
language_aliases
).
split
(
'-'
)[
0
]
if
language
:
params
[
'url'
]
+=
'&lr=lang_'
+
language
return
params
...
...
searx/engines/google_videos.py
View file @
25367cfb
...
...
@@ -7,7 +7,7 @@
@using-api no
@results HTML
@stable no
@parse url, title, content
@parse url, title, content
, thumbnail
"""
from
datetime
import
date
,
timedelta
...
...
@@ -15,7 +15,7 @@ from json import loads
from
lxml
import
html
from
searx.engines.xpath
import
extract_text
from
searx.url_utils
import
urlencode
import
re
# engine dependent config
categories
=
[
'videos'
]
...
...
@@ -25,7 +25,7 @@ time_range_support = True
number_of_results
=
10
search_url
=
'https://www.google.com/search'
\
'?{query}'
\
'?
q=
{query}'
\
'&tbm=vid'
\
'&{search_options}'
time_range_attr
=
"qdr:{range}"
...
...
@@ -69,15 +69,27 @@ def response(resp):
# parse results
for
result
in
dom
.
xpath
(
'//div[@class="g"]'
):
title
=
extract_text
(
result
.
xpath
(
'.//h3
/a
'
))
url
=
result
.
xpath
(
'.//
h3
/a/@href'
)[
0
]
title
=
extract_text
(
result
.
xpath
(
'.//h3'
))
url
=
result
.
xpath
(
'.//
div[@class="r"]
/a/@href'
)[
0
]
content
=
extract_text
(
result
.
xpath
(
'.//span[@class="st"]'
))
# get thumbnails
script
=
str
(
dom
.
xpath
(
'//script[contains(., "_setImagesSrc")]'
)[
0
].
text
)
id
=
result
.
xpath
(
'.//div[@class="s"]//img/@id'
)[
0
]
thumbnails_data
=
re
.
findall
(
's=
\'
(.*?)(?:
\\\\
[a-z,1-9,
\\\\
]+
\'
|
\'
)\;var ii=\[(?:|[
\'
vidthumb\d+
\'
,]+)
\'
'
+
id
,
script
)
tmp
=
[]
if
len
(
thumbnails_data
)
!=
0
:
tmp
=
re
.
findall
(
'(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)'
,
thumbnails_data
[
0
])
thumbnail
=
''
if
len
(
tmp
)
!=
0
:
thumbnail
=
tmp
[
-
1
]
# append result
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'content'
:
content
,
'thumbnail'
:
''
,
'thumbnail'
:
thumbnail
,
'template'
:
'videos.html'
})
return
results
searx/engines/mediawiki.py
View file @
25367cfb
...
...
@@ -45,7 +45,10 @@ def request(query, params):
format_strings
=
list
(
Formatter
().
parse
(
base_url
))
language
=
params
[
'language'
].
split
(
'-'
)[
0
]
if
params
[
'language'
]
==
'all'
:
language
=
'en'
else
:
language
=
params
[
'language'
].
split
(
'-'
)[
0
]
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
if
any
(
x
[
1
]
==
'language'
for
x
in
format_strings
):
...
...
Prev
1
2
3
4
5
…
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment