Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
M
my-spot
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Code Review
Insights
Issue
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
e
cloud
my-spot
Commits
ca09d910
Unverified
Commit
ca09d910
authored
Apr 09, 2019
by
Alexandre Flament
Committed by
GitHub
Apr 09, 2019
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1544 from MarcAbonce/youtube_fix
[fix] get YouTube results
parents
3ee804ec
e868650d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
90 additions
and
142 deletions
+90
-142
searx/engines/youtube_noapi.py
searx/engines/youtube_noapi.py
+34
-36
tests/unit/engines/test_youtube_noapi.py
tests/unit/engines/test_youtube_noapi.py
+56
-106
No files found.
searx/engines/youtube_noapi.py
View file @
ca09d910
...
...
@@ -8,7 +8,8 @@
# @stable no
# @parse url, title, content, publishedDate, thumbnail, embedded
from
lxml
import
html
from
functools
import
reduce
from
json
import
loads
from
searx.engines.xpath
import
extract_text
from
searx.utils
import
list_get
from
searx.url_utils
import
quote_plus
...
...
@@ -34,20 +35,6 @@ embedded_url = '<iframe width="540" height="304" ' +\
base_youtube_url
=
'https://www.youtube.com/watch?v='
# specific xpath variables
results_xpath
=
"//ol/li/div[contains(@class, 'yt-lockup yt-lockup-tile yt-lockup-video vve-check')]"
url_xpath
=
'.//h3/a/@href'
title_xpath
=
'.//div[@class="yt-lockup-content"]/h3/a'
content_xpath
=
'.//div[@class="yt-lockup-content"]/div[@class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2"]'
# returns extract_text on the first result selected by the xpath or None
def
extract_text_from_dom
(
result
,
xpath
):
r
=
result
.
xpath
(
xpath
)
if
len
(
r
)
>
0
:
return
extract_text
(
r
[
0
])
return
None
# do search-request
def
request
(
query
,
params
):
...
...
@@ -63,27 +50,38 @@ def request(query, params):
def
response
(
resp
):
results
=
[]
dom
=
html
.
fromstring
(
resp
.
text
)
# parse results
for
result
in
dom
.
xpath
(
results_xpath
):
videoid
=
list_get
(
result
.
xpath
(
'@data-context-item-id'
),
0
)
if
videoid
is
not
None
:
url
=
base_youtube_url
+
videoid
thumbnail
=
'https://i.ytimg.com/vi/'
+
videoid
+
'/hqdefault.jpg'
title
=
extract_text_from_dom
(
result
,
title_xpath
)
or
videoid
content
=
extract_text_from_dom
(
result
,
content_xpath
)
embedded
=
embedded_url
.
format
(
videoid
=
videoid
)
# append result
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'content'
:
content
,
'template'
:
'videos.html'
,
'embedded'
:
embedded
,
'thumbnail'
:
thumbnail
})
results_data
=
resp
.
text
[
resp
.
text
.
find
(
'ytInitialData'
):]
results_data
=
results_data
[
results_data
.
find
(
'{'
):
results_data
.
find
(
';
\n
'
)]
results_json
=
loads
(
results_data
)
if
results_data
else
{}
sections
=
results_json
.
get
(
'contents'
,
{})
\
.
get
(
'twoColumnSearchResultsRenderer'
,
{})
\
.
get
(
'primaryContents'
,
{})
\
.
get
(
'sectionListRenderer'
,
{})
\
.
get
(
'contents'
,
[])
for
section
in
sections
:
for
video_container
in
section
.
get
(
'itemSectionRenderer'
,
{}).
get
(
'contents'
,
[]):
video
=
video_container
.
get
(
'videoRenderer'
,
{})
videoid
=
video
.
get
(
'videoId'
)
if
videoid
is
not
None
:
url
=
base_youtube_url
+
videoid
thumbnail
=
'https://i.ytimg.com/vi/'
+
videoid
+
'/hqdefault.jpg'
title
=
video
.
get
(
'title'
,
{}).
get
(
'simpleText'
,
videoid
)
description_snippet
=
video
.
get
(
'descriptionSnippet'
,
{})
if
'runs'
in
description_snippet
:
content
=
reduce
(
lambda
a
,
b
:
a
+
b
.
get
(
'text'
,
''
),
description_snippet
.
get
(
'runs'
),
''
)
else
:
content
=
description_snippet
.
get
(
'simpleText'
,
''
)
embedded
=
embedded_url
.
format
(
videoid
=
videoid
)
# append result
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'content'
:
content
,
'template'
:
'videos.html'
,
'embedded'
:
embedded
,
'thumbnail'
:
thumbnail
})
# return results
return
results
tests/unit/engines/test_youtube_noapi.py
View file @
ca09d910
...
...
@@ -46,121 +46,71 @@ class TestYoutubeNoAPIEngine(SearxTestCase):
self
.
assertEqual
(
youtube_noapi
.
response
(
response
),
[])
html
=
"""
<ol id="item-section-063864" class="item-section">
<li>
<div class="yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile"
data-context-item-id="DIVZCPfAOeM"
data-visibility-tracking="CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JECx_-GK5uqMpcIB">
<div class="yt-lockup-dismissable"><div class="yt-lockup-thumbnail contains-addto">
<a aria-hidden="true" href="/watch?v=DIVZCPfAOeM" class=" yt-uix-sessionlink pf-link"
data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA">
<div class="yt-thumb video-thumb"><img src="//i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg"
width="196" height="110"/></div><span class="video-time" aria-hidden="true">11:35</span></a>
<span class="thumb-menu dark-overflow-action-menu video-actions">
</span>
</div>
<div class="yt-lockup-content">
<h3 class="yt-lockup-title">
<a href="/watch?v=DIVZCPfAOeM"
class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link"
data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA"
title="Top Speed Test Kawasaki Ninja H2 (Thailand) By. MEHAY SUPERBIKE"
aria-describedby="description-id-259079" rel="spf-prefetch" dir="ltr">
Title
</a>
<span class="accessible-description" id="description-id-259079"> - Durée : 11:35.</span>
</h3>
<div class="yt-lockup-byline">de
<a href="/user/mheejapan" class=" yt-uix-sessionlink spf-link g-hovercard"
data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JA" data-ytid="UCzEesu54Hjs0uRKmpy66qeA"
data-name="">MEHAY SUPERBIKE</a></div><div class="yt-lockup-meta">
<ul class="yt-lockup-meta-info">
<li>il y a 20 heures</li>
<li>8 424 vues</li>
</ul>
</div>
<div class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2" dir="ltr">
Description
</div>
<div class="yt-lockup-badges">
<ul class="yt-badge-list ">
<li class="yt-badge-item" >
<span class="yt-badge">Nouveauté</span>
</li>
<li class="yt-badge-item" ><span class="yt-badge " >HD</span></li>
</ul>
</div>
<div class="yt-lockup-action-menu yt-uix-menu-container">
<div class="yt-uix-menu yt-uix-videoactionmenu hide-until-delayloaded"
data-video-id="DIVZCPfAOeM" data-menu-content-id="yt-uix-videoactionmenu-menu">
</div>
</div>
</div>
</div>
</div>
</li>
</ol>
<div></div>
<script>
window["ytInitialData"] = {
"contents": {
"twoColumnSearchResultsRenderer": {
"primaryContents": {
"sectionListRenderer": {
"contents": [
{
"itemSectionRenderer": {
"contents": [
{
"videoRenderer": {
"videoId": "DIVZCPfAOeM",
"title": {
"simpleText": "Title"
},
"descriptionSnippet": {
"runs": [
{
"text": "Des"
},
{
"text": "cription"
}
]
}
}
},
{
"videoRenderer": {
"videoId": "9C_HReR_McQ",
"title": {
"simpleText": "Title"
},
"descriptionSnippet": {
"simpleText": "Description"
}
}
}
]
}
}
]
}
}
}
}
};
</script>
"""
response
=
mock
.
Mock
(
text
=
html
)
results
=
youtube_noapi
.
response
(
response
)
self
.
assertEqual
(
type
(
results
),
list
)
self
.
assertEqual
(
len
(
results
),
1
)
self
.
assertEqual
(
len
(
results
),
2
)
self
.
assertEqual
(
results
[
0
][
'title'
],
'Title'
)
self
.
assertEqual
(
results
[
0
][
'url'
],
'https://www.youtube.com/watch?v=DIVZCPfAOeM'
)
self
.
assertEqual
(
results
[
0
][
'content'
],
'Description'
)
self
.
assertEqual
(
results
[
0
][
'thumbnail'
],
'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg'
)
self
.
assertTrue
(
'DIVZCPfAOeM'
in
results
[
0
][
'embedded'
])
html
=
"""
<ol id="item-section-063864" class="item-section">
<li>
<div class="yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile"
data-context-item-id="DIVZCPfAOeM"
data-visibility-tracking="CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JECx_-GK5uqMpcIB">
<div class="yt-lockup-dismissable"><div class="yt-lockup-thumbnail contains-addto">
<a aria-hidden="true" href="/watch?v=DIVZCPfAOeM" class=" yt-uix-sessionlink pf-link"
data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA">
<div class="yt-thumb video-thumb"><img src="//i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg"
width="196" height="110"/></div><span class="video-time" aria-hidden="true">11:35</span></a>
<span class="thumb-menu dark-overflow-action-menu video-actions">
</span>
</div>
<div class="yt-lockup-content">
<h3 class="yt-lockup-title">
<span class="accessible-description" id="description-id-259079"> - Durée : 11:35.</span>
</h3>
<div class="yt-lockup-byline">de
<a href="/user/mheejapan" class=" yt-uix-sessionlink spf-link g-hovercard"
data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JA" data-ytid="UCzEesu54Hjs0uRKmpy66qeA"
data-name="">MEHAY SUPERBIKE</a></div><div class="yt-lockup-meta">
<ul class="yt-lockup-meta-info">
<li>il y a 20 heures</li>
<li>8 424 vues</li>
</ul>
</div>
<div class="yt-lockup-badges">
<ul class="yt-badge-list ">
<li class="yt-badge-item" >
<span class="yt-badge">Nouveauté</span>
</li>
<li class="yt-badge-item" ><span class="yt-badge " >HD</span></li>
</ul>
</div>
<div class="yt-lockup-action-menu yt-uix-menu-container">
<div class="yt-uix-menu yt-uix-videoactionmenu hide-until-delayloaded"
data-video-id="DIVZCPfAOeM" data-menu-content-id="yt-uix-videoactionmenu-menu">
</div>
</div>
</div>
</div>
</div>
</li>
</ol>
"""
response
=
mock
.
Mock
(
text
=
html
)
results
=
youtube_noapi
.
response
(
response
)
self
.
assertEqual
(
type
(
results
),
list
)
self
.
assertEqual
(
len
(
results
),
1
)
self
.
assertEqual
(
results
[
1
][
'title'
],
'Title'
)
self
.
assertEqual
(
results
[
1
][
'url'
],
'https://www.youtube.com/watch?v=9C_HReR_McQ'
)
self
.
assertEqual
(
results
[
1
][
'content'
],
'Description'
)
self
.
assertEqual
(
results
[
1
][
'thumbnail'
],
'https://i.ytimg.com/vi/9C_HReR_McQ/hqdefault.jpg'
)
self
.
assertTrue
(
'9C_HReR_McQ'
in
results
[
1
][
'embedded'
])
html
=
"""
<ol id="item-section-063864" class="item-section">
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment