Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 057e9bc1 authored by Markus Heiser's avatar Markus Heiser
Browse files

[mod] SepiaSearch: re-engineered & upgrade to data_type: traits_v1



- fetch_traits() SepiaSearch and Peertube are using identical languages.
  Replace module's dictionary `supported_languages` by `engine.traits.languages`
  (data_type: `traits_v1`).
- fixed code to pass pylint
- request(): add argument boostLanguages
- response(): is replaced by peertube's video_response() function, which adds
  metadata from channel name, host & tags

Signed-off-by: default avatarMarkus Heiser <markus.heiser@darmarit.de>
parent 8a8c584f
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -17,3 +17,11 @@ Peertube Video

.. automodule:: searx.engines.peertube
  :members:

.. _sepiasearch engine:

SepiaSearch
===========

.. automodule:: searx.engines.sepiasearch
  :members:
+30 −0
Original line number Diff line number Diff line
@@ -3301,6 +3301,36 @@
    },
    "supported_languages": {}
  },
  "sepiasearch": {
    "all_locale": null,
    "custom": {},
    "data_type": "traits_v1",
    "languages": {
      "ca": "ca",
      "cs": "cs",
      "de": "de",
      "el": "el",
      "en": "en",
      "eo": "eo",
      "es": "es",
      "eu": "eu",
      "fi": "fi",
      "fr": "fr",
      "gd": "gd",
      "it": "it",
      "ja": "ja",
      "nl": "nl",
      "pl": "pl",
      "pt": "pt",
      "ru": "ru",
      "sv": "sv",
      "zh": "zh",
      "zh_Hans": "zh",
      "zh_Hant": "zh"
    },
    "regions": {},
    "supported_languages": {}
  },
  "startpage": {
    "all_locale": null,
    "custom": {},
+45 −65
Original line number Diff line number Diff line
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""SepiaSearch uses the same languages as :py:obj:`Peertube
<searx.engines.peertube>` and the response is identical to the response from the
peertube engines.

"""
 SepiaSearch (Videos)
"""

from json import loads
from dateutil import parser, relativedelta
from typing import TYPE_CHECKING

from urllib.parse import urlencode
from datetime import datetime

# about
from searx.engines.peertube import fetch_traits  # pylint: disable=unused-import
from searx.engines.peertube import (
    # pylint: disable=unused-import
    video_response,
    safesearch_table,
    time_range_table,
)
from searx.enginelib.traits import EngineTraits

if TYPE_CHECKING:
    import logging

    logger: logging.Logger

traits: EngineTraits

about = {
    # pylint: disable=line-too-long
    "website": 'https://sepiasearch.org',
    "wikidata_id": None,
    "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api",  # NOQA
    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
}

# engine dependent config
categories = ['videos']
paging = True

base_url = 'https://sepiasearch.org'

time_range_support = True
safesearch = True
supported_languages = [
    # fmt: off
    'en', 'fr', 'ja', 'eu', 'ca', 'cs', 'eo', 'el',
    'de', 'it', 'nl', 'es', 'oc', 'gd', 'zh', 'pt',
    'sv', 'pl', 'fi', 'ru'
    # fmt: on
]
base_url = 'https://sepiasearch.org/api/v1/search/videos'

safesearch_table = {0: 'both', 1: 'false', 2: 'false'}

time_range_table = {
    'day': relativedelta.relativedelta(),
    'week': relativedelta.relativedelta(weeks=-1),
    'month': relativedelta.relativedelta(months=-1),
    'year': relativedelta.relativedelta(years=-1),
}


def minute_to_hm(minute):
    if isinstance(minute, int):
        return "%d:%02d" % (divmod(minute, 60))
    return None
def request(query, params):
    """Assemble request for the SepiaSearch API"""

    if not query:
        return False

    # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
    eng_lang = traits.get_language(params['searxng_locale'], None)

def request(query, params):
    params['url'] = (
        base_url
        + '?'
        base_url.rstrip("/")
        + "/api/v1/search/videos?"
        + urlencode(
            {
                'search': query,
                'start': (params['pageno'] - 1) * 10,
                'count': 10,
                'sort': '-match',
                # -createdAt: sort by date ascending / createdAt: date descending
                'sort': '-match',  # sort by *match descending*
                'nsfw': safesearch_table[params['safesearch']],
            }
        )
    )

    language = params['language'].split('-')[0]
    if language in supported_languages:
        params['url'] += '&languageOneOf[]=' + language
    if eng_lang is not None:
        params['url'] += '&languageOneOf[]=' + eng_lang
        params['url'] += '&boostLanguages[]=' + eng_lang

    if params['time_range'] in time_range_table:
        time = datetime.now().date() + time_range_table[params['time_range']]
        params['url'] += '&startDate=' + time.isoformat()
@@ -73,34 +83,4 @@ def request(query, params):


def response(resp):
    results = []

    search_results = loads(resp.text)

    if 'data' not in search_results:
        return []

    for result in search_results['data']:
        title = result['name']
        content = result['description']
        thumbnail = result['thumbnailUrl']
        publishedDate = parser.parse(result['publishedAt'])
        author = result.get('account', {}).get('displayName')
        length = minute_to_hm(result.get('duration'))
        url = result['url']

        results.append(
            {
                'url': url,
                'title': title,
                'content': content,
                'author': author,
                'length': length,
                'template': 'videos.html',
                'publishedDate': publishedDate,
                'iframe_src': result.get('embedUrl'),
                'thumbnail': thumbnail,
            }
        )

    return results
    return video_response(resp)