Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
e
infra
spot
Commits
ebdfdcde
Commit
ebdfdcde
authored
Jan 25, 2022
by
Nivesh Krishna
Browse files
Merge branch '79-fix-ina-engine' into 'master'
Update ina engine xpath values Closes #79 See merge request e/cloud/my-spot!98
parents
8ebc5827
a45408e8
Changes
1
Hide whitespace changes
Inline
Side-by-side
searx/engines/ina.py
View file @
ebdfdcde
...
@@ -3,11 +3,9 @@
...
@@ -3,11 +3,9 @@
INA (Videos)
INA (Videos)
"""
"""
from
json
import
loads
from
html
import
unescape
from
html
import
unescape
from
urllib.parse
import
urlencode
from
urllib.parse
import
urlencode
from
lxml
import
html
from
lxml
import
html
from
dateutil
import
parser
from
searx.utils
import
extract_text
from
searx.utils
import
extract_text
# about
# about
...
@@ -23,25 +21,23 @@ about = {
...
@@ -23,25 +21,23 @@ about = {
# engine dependent config
# engine dependent config
categories
=
[
'videos'
]
categories
=
[
'videos'
]
paging
=
True
paging
=
True
page_size
=
48
page_size
=
12
# search-url
# search-url
base_url
=
'https://www.ina.fr'
base_url
=
'https://www.ina.fr'
search_url
=
base_url
+
'/
layout/set/
ajax/recherche
/result?autopromote=&hf={ps}&b={start}&type=Video&r=&{query}
'
search_url
=
base_url
+
'/ajax/recherche
?{query}&espace=1&sort=pertinence&order=desc&offset={start}&modified=size
'
# specific xpath variables
# specific xpath variables
results_xpath
=
'//div[
contains(@class,"search-results--list")]//div[@class="media-body"]
'
results_xpath
=
'//div[
@id="searchHits"]/div
'
url_xpath
=
'.//a/@href'
url_xpath
=
'.//a/@href'
title_xpath
=
'.//h3[@class="h3--title media-heading"]'
title_xpath
=
'.//div[contains(@class,"title-bloc-small")]'
thumbnail_xpath
=
'.//img/@src'
thumbnail_xpath
=
'.//img/@data-src'
publishedDate_xpath
=
'.//span[@class="broadcast"]'
publishedDate_xpath
=
'//div[@id="searchHits"]//div[contains(@class,"dateAgenda")]'
content_xpath
=
'.//p[@class="media-body__summary"]'
# do search-request
# do search-request
def
request
(
query
,
params
):
def
request
(
query
,
params
):
params
[
'url'
]
=
search_url
.
format
(
ps
=
page_size
,
params
[
'url'
]
=
search_url
.
format
(
start
=
params
[
'pageno'
]
*
page_size
,
start
=
params
[
'pageno'
]
*
page_size
,
query
=
urlencode
({
'q'
:
query
}))
query
=
urlencode
({
'q'
:
query
}))
return
params
return
params
...
@@ -51,34 +47,16 @@ def request(query, params):
...
@@ -51,34 +47,16 @@ def request(query, params):
def
response
(
resp
):
def
response
(
resp
):
results
=
[]
results
=
[]
# we get html in a JSON container...
dom
=
html
.
fromstring
(
resp
.
text
)
response
=
loads
(
resp
.
text
)
dom
=
html
.
fromstring
(
response
)
# parse results
# parse results
for
result
in
dom
.
xpath
(
results_xpath
):
for
result
in
dom
.
xpath
(
results_xpath
):
videoid
=
result
.
xpath
(
url_xpath
)[
0
]
url_relative
=
result
.
xpath
(
url_xpath
)[
0
]
url
=
base_url
+
videoid
url
=
base_url
+
url_relative
title
=
unescape
(
extract_text
(
result
.
xpath
(
title_xpath
)))
title
=
unescape
(
extract_text
(
result
.
xpath
(
title_xpath
)))
try
:
thumbnail
=
extract_text
(
result
.
xpath
(
thumbnail_xpath
))
thumbnail
=
extract_text
(
result
.
xpath
(
thumbnail_xpath
)[
0
])
except
:
thumbnail
=
''
if
thumbnail
and
thumbnail
[
0
]
==
'/'
:
thumbnail
=
base_url
+
thumbnail
d
=
extract_text
(
result
.
xpath
(
publishedDate_xpath
)[
0
])
d
=
d
.
split
(
'/'
)
# force ISO date to avoid wrong parsing
d
=
"%s-%s-%s"
%
(
d
[
2
],
d
[
1
],
d
[
0
])
publishedDate
=
parser
.
parse
(
d
)
content
=
extract_text
(
result
.
xpath
(
content_xpath
))
# append result
results
.
append
({
'url'
:
url
,
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'title'
:
title
,
'content'
:
content
,
'template'
:
'videos.html'
,
'template'
:
'videos.html'
,
'publishedDate'
:
publishedDate
,
'thumbnail'
:
thumbnail
})
'thumbnail'
:
thumbnail
})
# return results
# return results
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment