Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
e
infra
spot
Commits
cf26aba9
Commit
cf26aba9
authored
Jan 04, 2019
by
Venca24
Browse files
[FIX] google videos thumbnails
parent
cee15f03
Changes
1
Hide whitespace changes
Inline
Side-by-side
searx/engines/google_videos.py
View file @
cf26aba9
...
...
@@ -7,15 +7,16 @@
@using-api no
@results HTML
@stable no
@parse url, title, content
@parse url, title, content
, thumbnail
"""
from
datetime
import
date
,
timedelta
from
json
import
loads
from
lxml
import
html
from
searx.engines
import
logger
from
searx.engines.xpath
import
extract_text
from
searx.url_utils
import
urlencode
import
re
# engine dependent config
categories
=
[
'videos'
]
...
...
@@ -73,11 +74,24 @@ def response(resp):
url
=
result
.
xpath
(
'.//div[@class="r"]/a/@href'
)[
0
]
content
=
extract_text
(
result
.
xpath
(
'.//span[@class="st"]'
))
# get thumbnails
script
=
str
(
dom
.
xpath
(
'//script[contains(., "_setImagesSrc")]'
)[
0
].
text
)
id
=
result
.
xpath
(
'.//div[@class="s"]//img/@id'
)[
0
]
thumbnails_data
=
re
.
findall
(
's=
\'
(.*?)(?:
\\\\
[a-z,1-9,
\\\\
]+
\'
|
\'
)\;var ii=\[(?:|[
\'
vidthumb\d+
\'
,]+)
\'
'
+
id
,
script
)
logger
.
debug
(
'google video engine: '
+
id
+
' matched '
+
str
(
len
(
thumbnails_data
))
+
' times (thumbnail)'
)
tmp
=
[]
if
len
(
thumbnails_data
)
!=
0
:
tmp
=
re
.
findall
(
'(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)'
,
thumbnails_data
[
0
])
thumbnail
=
''
if
len
(
tmp
)
!=
0
:
thumbnail
=
tmp
[
-
1
]
# append result
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'content'
:
content
,
'thumbnail'
:
''
,
'thumbnail'
:
thumbnail
,
'template'
:
'videos.html'
})
return
results
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment