onesearch engine (8c231bdc) · Commits · e / infra / spot

searx/engines/onesearch.py

0 → 100644

+58 −0

Original line number	Diff line number	Diff line

		"""Onesearch
		"""

		from lxml.html import fromstring

		import re

		from searx.utils import (
		eval_xpath,
		extract_text,
		)

		from urllib.parse import unquote

		# about
		about = {
		"website": 'https://www.onesearch.com/',
		"wikidata_id": None,
		"use_official_api": False,
		"require_api_key": False,
		"results": 'HTML',
		}

		# engine dependent config
		categories = ['general']
		paging = True

		# search-url
		URL = 'https://www.onesearch.com/yhs/search;?p=%s&b=%d'


		def request(query, params):
		starting_from = (params['pageno'] * 10) - 9
		params['url'] = URL % (query, starting_from)
		return params


		# get response from search-request
		def response(resp):

		results = []
		doc = fromstring(resp.text)

		titles_tags = eval_xpath(doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]')
		contents = eval_xpath(doc, '//div[contains(@class, "algo")]/div[contains(@class, "compText")]/p')
		onesearch_urls = eval_xpath(doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]/a/@href')

		for title_tag, content, onesearch_url in zip(titles_tags, contents, onesearch_urls):
		print(f"{title_tag.text_content()} ---> {onesearch_url}")
		matches = re.search(r'RU=(.*?)\/', onesearch_url)
		results.append({
		'title': title_tag.text_content(),
		'content': extract_text(content),
		'url': unquote(matches.group(1)),
		})

		return results

+12 −0

Original line number	Diff line number	Diff line
		@@ -1631,6 +1631,18 @@ engines:
		require_api_key: false
		results: HTML

		- name: onesearch
		shortcut: onesearch
		engine: onesearch
		paging: true
		categories: general
		about:
		website: https://www.onesearch.com/
		wikidata_id: None
		use_official_api: false
		require_api_key: false
		results: HTML

		# Doku engine lets you access to any Doku wiki instance:
		# A public one or a private/corporate one.
		# - name : ubuntuwiki