diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index d9ff3f82122d618e2c04425f6ea0ec84792b881c..3f60475dd122b0e286c1b3ef779df966366dbf60 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -1,32 +1,20 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Google (Images) +# lint: pylint +"""This is the implementation of the google images engine using the google +internal API used the Google Go Android app. -For detailed description of the *REST-full* API see: `Query Parameter -Definitions`_. +This internal API offer results in -.. _admonition:: Content-Security-Policy (CSP) +- JSON (_fmt:json) +- Protobuf (_fmt:pb) +- Protobuf compressed? (_fmt:pc) +- HTML (_fmt:html) +- Protobuf encoded in JSON (_fmt:jspb). - This engine needs to allow images from the `data URLs`_ (prefixed with the - ``data:` scheme).:: - - Header set Content-Security-Policy "img-src 'self' data: ;" - -.. _Query Parameter Definitions: - https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions -.. _data URLs: - https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs """ -from urllib.parse import urlencode, unquote -from lxml import html - -from searx import logger -from searx.utils import ( - eval_xpath, - eval_xpath_list, - eval_xpath_getindex, - extract_text, -) +from urllib.parse import urlencode +from json import loads from searx.engines.google import ( get_lang_info, @@ -35,13 +23,9 @@ from searx.engines.google import ( ) # pylint: disable=unused-import -from searx.engines.google import ( - supported_languages_url - , _fetch_supported_languages -) -# pylint: enable=unused-import +from searx.engines.google import supported_languages_url, _fetch_supported_languages -logger = logger.getChild('google images') +# pylint: enable=unused-import # about about = { @@ -50,83 +34,53 @@ about = { "official_api_documentation": 'https://developers.google.com/custom-search', "use_official_api": False, "require_api_key": False, - "results": 'HTML', + "results": 'JSON', } # engine dependent config -categories = ['images'] -paging = False +categories = ['images', 'web'] +paging = True use_locale_domain = True time_range_support = True safesearch = True +send_accept_language_header = True -filter_mapping = { - 0: 'images', - 1: 'active', - 2: 'active' -} - - -def scrap_out_thumbs(dom): - """Scrap out thumbnail data from