Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a45408e8 authored by Israel Yago Pereira's avatar Israel Yago Pereira Committed by Nivesh Krishna
Browse files

Update ina engine xpath values

parent b7f8aadc
Loading
Loading
Loading
Loading
+11 −33
Original line number Diff line number Diff line
@@ -3,11 +3,9 @@
 INA (Videos)
"""

from json import loads
from html import unescape
from urllib.parse import urlencode
from lxml import html
from dateutil import parser
from searx.utils import extract_text

# about
@@ -23,25 +21,23 @@ about = {
# engine dependent config
categories = ['videos']
paging = True
page_size = 48
page_size = 12

# search-url
base_url = 'https://www.ina.fr'
search_url = base_url + '/layout/set/ajax/recherche/result?autopromote=&hf={ps}&b={start}&type=Video&r=&{query}'
search_url = base_url + '/ajax/recherche?{query}&espace=1&sort=pertinence&order=desc&offset={start}&modified=size'

# specific xpath variables
results_xpath = '//div[contains(@class,"search-results--list")]//div[@class="media-body"]'
results_xpath = '//div[@id="searchHits"]/div'
url_xpath = './/a/@href'
title_xpath = './/h3[@class="h3--title media-heading"]'
thumbnail_xpath = './/img/@src'
publishedDate_xpath = './/span[@class="broadcast"]'
content_xpath = './/p[@class="media-body__summary"]'
title_xpath = './/div[contains(@class,"title-bloc-small")]'
thumbnail_xpath = './/img/@data-src'
publishedDate_xpath = '//div[@id="searchHits"]//div[contains(@class,"dateAgenda")]'


# do search-request
def request(query, params):
    params['url'] = search_url.format(ps=page_size,
                                      start=params['pageno'] * page_size,
    params['url'] = search_url.format(start=params['pageno'] * page_size,
                                      query=urlencode({'q': query}))

    return params
@@ -51,34 +47,16 @@ def request(query, params):
def response(resp):
    results = []

    # we get html in a JSON container...
    response = loads(resp.text)
    dom = html.fromstring(response)

    dom = html.fromstring(resp.text)
    # parse results
    for result in dom.xpath(results_xpath):
        videoid = result.xpath(url_xpath)[0]
        url = base_url + videoid
        url_relative = result.xpath(url_xpath)[0]
        url = base_url + url_relative
        title = unescape(extract_text(result.xpath(title_xpath)))
        try:
            thumbnail = extract_text(result.xpath(thumbnail_xpath)[0])
        except:
            thumbnail = ''
        if thumbnail and thumbnail[0] == '/':
            thumbnail = base_url + thumbnail
        d = extract_text(result.xpath(publishedDate_xpath)[0])
        d = d.split('/')
        # force ISO date to avoid wrong parsing
        d = "%s-%s-%s" % (d[2], d[1], d[0])
        publishedDate = parser.parse(d)
        content = extract_text(result.xpath(content_xpath))

        # append result
        thumbnail = extract_text(result.xpath(thumbnail_xpath))
        results.append({'url': url,
                        'title': title,
                        'content': content,
                        'template': 'videos.html',
                        'publishedDate': publishedDate,
                        'thumbnail': thumbnail})

    # return results