Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
e
infra
spot
Commits
1ea56576
Unverified
Commit
1ea56576
authored
Jan 04, 2019
by
Noémi Ványi
Committed by
GitHub
Jan 04, 2019
Browse files
Merge branch 'master' into devel_google_videos
parents
0e493db2
899ba5d6
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
Dockerfile
View file @
1ea56576
...
...
@@ -32,6 +32,7 @@ RUN echo "@commuedge http://nl.alpinelinux.org/alpine/edge/community" >> /etc/ap
openssl-dev
\
ca-certificates
\
tini@commuedge
\
&&
pip
install
--upgrade
pip
\
&&
pip
install
--no-cache
-r
requirements.txt
\
&&
apk del
\
build-base
\
...
...
searx/data/engines_languages.json
View file @
1ea56576
This diff is collapsed.
Click to expand it.
searx/engines/bing_images.py
View file @
1ea56576
...
...
@@ -88,9 +88,7 @@ def response(resp):
url
=
json_data
.
get
(
'purl'
)
img_src
=
json_data
.
get
(
'murl'
)
thumb_json_data
=
loads
(
_quote_keys_regex
.
sub
(
r
'\1"\2": \3'
,
link
.
attrib
.
get
(
'mad'
)))
thumbnail
=
thumb_json_data
.
get
(
'turl'
)
thumbnail
=
json_data
.
get
(
'turl'
)
# append result
results
.
append
({
'template'
:
'images.html'
,
...
...
searx/engines/findx.py
deleted
100644 → 0
View file @
0e493db2
"""
FindX (General, Images, Videos)
@website https://www.findx.com
@provide-api no
@using-api no
@results HTML
@stable no
@parse url, title, content, embedded, img_src, thumbnail_src
"""
from
dateutil
import
parser
from
json
import
loads
import
re
from
lxml
import
html
from
searx
import
logger
from
searx.engines.xpath
import
extract_text
from
searx.engines.youtube_noapi
import
base_youtube_url
,
embedded_url
from
searx.url_utils
import
urlencode
paging
=
True
results_xpath
=
'//script[@id="initial-state"]'
search_url
=
'https://www.findx.com/{category}?{q}'
type_map
=
{
'none'
:
'web'
,
'general'
:
'web'
,
'images'
:
'images'
,
'videos'
:
'videos'
,
}
def
request
(
query
,
params
):
params
[
'url'
]
=
search_url
.
format
(
category
=
type_map
[
params
[
'category'
]],
q
=
urlencode
({
'q'
:
query
,
'page'
:
params
[
'pageno'
]
})
)
return
params
def
response
(
resp
):
dom
=
html
.
fromstring
(
resp
.
text
)
results_raw_json
=
dom
.
xpath
(
results_xpath
)
results_json
=
loads
(
extract_text
(
results_raw_json
))
if
len
(
results_json
[
'web'
][
'results'
])
>
0
:
return
_general_results
(
results_json
[
'web'
][
'results'
][
'webSearch'
][
'results'
])
if
len
(
results_json
[
'images'
][
'results'
])
>
0
:
return
_images_results
(
results_json
[
'images'
][
'results'
])
if
len
(
results_json
[
'video'
][
'results'
])
>
0
:
return
_videos_results
(
results_json
[
'video'
][
'results'
])
return
[]
def
_general_results
(
general_results
):
results
=
[]
for
result
in
general_results
:
results
.
append
({
'url'
:
result
[
'url'
],
'title'
:
result
[
'title'
],
'content'
:
result
[
'sum'
],
})
return
results
def
_images_results
(
image_results
):
results
=
[]
for
result
in
image_results
:
results
.
append
({
'url'
:
result
[
'sourceURL'
],
'title'
:
result
[
'title'
],
'content'
:
result
[
'source'
],
'thumbnail_src'
:
_extract_url
(
result
[
'assets'
][
'thumb'
][
'url'
]),
'img_src'
:
_extract_url
(
result
[
'assets'
][
'file'
][
'url'
]),
'template'
:
'images.html'
,
})
return
results
def
_videos_results
(
video_results
):
results
=
[]
for
result
in
video_results
:
if
not
result
[
'kind'
].
startswith
(
'youtube'
):
logger
.
warn
(
'Unknown video kind in findx: {}'
.
format
(
result
[
'kind'
]))
continue
description
=
result
[
'snippet'
][
'description'
]
if
len
(
description
)
>
300
:
description
=
description
[:
300
]
+
'...'
results
.
append
({
'url'
:
base_youtube_url
+
result
[
'id'
],
'title'
:
result
[
'snippet'
][
'title'
],
'content'
:
description
,
'thumbnail'
:
_extract_url
(
result
[
'snippet'
][
'thumbnails'
][
'default'
][
'url'
]),
'publishedDate'
:
parser
.
parse
(
result
[
'snippet'
][
'publishedAt'
]),
'embedded'
:
embedded_url
.
format
(
videoid
=
result
[
'id'
]),
'template'
:
'videos.html'
,
})
return
results
def
_extract_url
(
url
):
matching
=
re
.
search
(
'(/https?://[^)]+)'
,
url
)
if
matching
:
return
matching
.
group
(
0
)[
1
:]
return
''
searx/engines/startpage.py
View file @
1ea56576
...
...
@@ -32,8 +32,9 @@ search_url = base_url + 'do/search'
# specific xpath variables
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
# not ads: div[@class="result"] are the direct childs of div[@id="results"]
results_xpath
=
'//
div[@class="result"
]'
results_xpath
=
'//
li[contains(@class, "search-result") and contains(@class, "search-item")
]'
link_xpath
=
'.//h3/a'
content_xpath
=
'./p[@class="search-item__body"]'
# do search-request
...
...
@@ -73,14 +74,10 @@ def response(resp):
if
re
.
match
(
r
"^http(s|)://(www\.)?startpage\.com/do/search\?.*$"
,
url
):
continue
# block ixquick search url's
if
re
.
match
(
r
"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$"
,
url
):
continue
title
=
extract_text
(
link
)
if
result
.
xpath
(
'./p[@class="desc clk"]'
):
content
=
extract_text
(
result
.
xpath
(
'./p[@class="desc clk"]'
))
if
result
.
xpath
(
content_xpath
):
content
=
extract_text
(
result
.
xpath
(
content_xpath
))
else
:
content
=
''
...
...
searx/settings.yml
View file @
1ea56576
...
...
@@ -218,24 +218,6 @@ engines:
shortcut
:
fd
disabled
:
True
-
name
:
findx
engine
:
findx
shortcut
:
fx
categories
:
general
disabled
:
True
-
name
:
findx images
engine
:
findx
shortcut
:
fxi
categories
:
images
disabled
:
True
-
name
:
findx videos
engine
:
findx
shortcut
:
fxv
categories
:
videos
disabled
:
True
-
name
:
flickr
categories
:
images
shortcut
:
fl
...
...
@@ -597,14 +579,6 @@ engines:
timeout
:
6.0
disabled
:
True
-
name
:
ixquick
engine
:
startpage
base_url
:
'
https://www.ixquick.eu/'
search_url
:
'
https://www.ixquick.eu/do/search'
shortcut
:
iq
timeout
:
6.0
disabled
:
True
-
name
:
swisscows
engine
:
swisscows
shortcut
:
sw
...
...
@@ -723,6 +697,19 @@ engines:
shortcut
:
du
disabled
:
True
-
name
:
seznam
shortcut
:
szn
engine
:
xpath
paging
:
True
search_url
:
https://search.seznam.cz/?q={query}&count=10&from={pageno}
results_xpath
:
//div[@class="Page-content"]//div[@class="Result "]
url_xpath
:
./h3/a/@href
title_xpath
:
./h3
content_xpath
:
.//p[@class="Result-description"]
first_page_num
:
0
page_size
:
10
disabled
:
True
# - name : yacy
# engine : yacy
# shortcut : ya
...
...
tests/unit/engines/test_bing_images.py
View file @
1ea56576
...
...
@@ -52,7 +52,7 @@ class TestBingImagesEngine(SearxTestCase):
<li>
<div>
<div class="imgpt">
<a m='{"purl":"page_url","murl":"img_url"
}' mad='{
"turl":"thumb_url"}'>
<a m='{"purl":"page_url","murl":"img_url"
,
"turl":"thumb_url"}'>
<img src="" alt="alt text" />
</a>
</div>
...
...
@@ -60,7 +60,7 @@ class TestBingImagesEngine(SearxTestCase):
</div>
<div>
<div class="imgpt">
<a m='{"purl":"page_url2","murl":"img_url2"
}' mad='{
"turl":"thumb_url2"}'>
<a m='{"purl":"page_url2","murl":"img_url2"
,
"turl":"thumb_url2"}'>
<img src="" alt="alt text 2" />
</a>
</div>
...
...
@@ -71,7 +71,7 @@ class TestBingImagesEngine(SearxTestCase):
<li>
<div>
<div class="imgpt">
<a m='{"purl":"page_url3","murl":"img_url3"
}' mad='{
"turl":"thumb_url3"}'>
<a m='{"purl":"page_url3","murl":"img_url3"
,
"turl":"thumb_url3"}'>
<img src="" alt="alt text 3" />
</a>
</div>
...
...
tests/unit/engines/test_startpage.py
View file @
1ea56576
...
...
@@ -31,14 +31,14 @@ class TestStartpageEngine(SearxTestCase):
self
.
assertEqual
(
startpage
.
response
(
response
),
[])
html
=
"""
<
div
class=
'
result
'
s
tyle=' *width : auto; *margin-right : 10%;'
>
<
li
class=
"search-
result s
earch-item"
>
<h3>
<a href='http://this.should.be.the.link/' id='title_2' name='title_2' >
This should be the title
</a>
<span id='title_stars_2' name='title_stars_2'> </span>
</h3>
<p class=
'desc clk'
>
<p class=
"search-item__body"
>
This should be the content.
</p>
<p>
...
...
@@ -56,7 +56,7 @@ class TestStartpageEngine(SearxTestCase):
Mis en surbrillance
</A>
</p>
</
div
>
</
li
>
"""
response
=
mock
.
Mock
(
text
=
html
.
encode
(
'utf-8'
))
results
=
startpage
.
response
(
response
)
...
...
@@ -67,14 +67,14 @@ class TestStartpageEngine(SearxTestCase):
self
.
assertEqual
(
results
[
0
][
'content'
],
'This should be the content.'
)
html
=
"""
<
div
class=
'
result
'
s
tyle=' *width : auto; *margin-right : 10%;'
>
<
li
class=
"search-
result s
earch-item"
>
<h3>
<a href='http://www.google.com/aclk?sa=l&ai=C' id='title_2' name='title_2' >
This should be the title
</a>
<span id='title_stars_2' name='title_stars_2'> </span>
</h3>
<p class=
'desc clk'
>
<p class=
"search-item__body"
>
This should be the content.
</p>
<p>
...
...
@@ -92,20 +92,20 @@ class TestStartpageEngine(SearxTestCase):
Mis en surbrillance
</A>
</p>
</
div
>
<
div
class=
'
result
'
s
tyle=' *width : auto; *margin-right : 10%;'
>
</
li
>
<
li
class=
"search-
result s
earch-item"
>
<h3>
<span id='title_stars_2' name='title_stars_2'> </span>
</h3>
<p class=
'desc clk'
>
<p class=
"search-item__body"
>
This should be the content.
</p>
<p>
<span class='url'>www.speed<b>test</b>.net/fr/
</span>
</p>
</
div
>
<
div
class=
'
result
'
s
tyle=' *width : auto; *margin-right : 10%;'
>
</
li
>
<
li
class=
"search-
result s
earch-item"
>
<h3>
<a href='http://this.should.be.the.link/' id='title_2' name='title_2' >
This should be the title
...
...
@@ -127,7 +127,7 @@ class TestStartpageEngine(SearxTestCase):
Mis en surbrillance
</A>
</p>
</
div
>
</
li
>
"""
response
=
mock
.
Mock
(
text
=
html
.
encode
(
'utf-8'
))
results
=
startpage
.
response
(
response
)
...
...
utils/fetch_languages.py
View file @
1ea56576
...
...
@@ -27,12 +27,14 @@ def fetch_supported_languages():
if
hasattr
(
engines
[
engine_name
],
'fetch_supported_languages'
):
try
:
engines_languages
[
engine_name
]
=
engines
[
engine_name
].
fetch_supported_languages
()
if
type
(
engines_languages
[
engine_name
])
==
list
:
engines_languages
[
engine_name
]
=
sorted
(
engines_languages
[
engine_name
])
except
Exception
as
e
:
print
(
e
)
# write json file
with
io
.
open
(
engines_languages_file
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
dump
(
engines_languages
,
f
,
ensure_ascii
=
False
)
dump
(
engines_languages
,
f
,
ensure_ascii
=
False
,
indent
=
4
,
separators
=
(
','
,
': '
)
)
return
engines_languages
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment