Loading README.md +3 −5 Original line number Diff line number Diff line Loading @@ -57,12 +57,10 @@ Then go to http://localhost:8088. ### For developer You can directly run spot, with a python command inside a docker container which contains all dependencies. After running the docker-compose stack you can stop the default spot service and run the debug one with Flask server. You can only run spot, with a python command inside a docker container which contains all dependencies. ``` docker-compose rm -sf spot docker run -it --rm -v $(pwd):/ws -w /ws --hostname spot --network=my-spot_default --env-file .env registry.gitlab.e.foundation:5000/e/cloud/my-spot/env bash docker run -it --rm -v $(pwd):/ws -w /ws -e SEARX_UI_DEFAULT_THEME=etheme -p 8088:80 registry.gitlab.e.foundation:5000/e/cloud/my-spot/env bash PYTHONPATH=$(pwd) SEARX_DEBUG=1 python -X dev searx/webapp.py ``` searx/engines/google.py +83 −27 Original line number Diff line number Diff line Loading @@ -134,42 +134,113 @@ spelling_suggestion_xpath = '//div[@class="med"]/p/a' def get_lang_info(params, lang_list, custom_aliases, supported_any_language): ret_val = {} """Composing various language properties for the google engines. This function is called by the various google engines (google itself, google-images, -news, -scholar, -videos). :param dict param: request parameters of the engine :param list lang_list: list of supported languages of the engine :py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>` :param dict lang_list: custom aliases for non standard language codes (used when calling :py:func:`searx.utils.match_language) :param bool supported_any_language: When a language is not specified, the language interpretation is left up to Google to decide how the search results should be delivered. This argument is ``True`` for the google engine and ``False`` for the other engines (google-images, -news, -scholar, -videos). :rtype: dict :returns: Py-Dictionary with the key/value pairs: language: Return value from :py:func:`searx.utils.match_language country: The country code (e.g. US, AT, CA, FR, DE ..) subdomain: Google subdomain :py:obj:`google_domains` that fits to the country code. params: Py-Dictionary with additional request arguments (can be passed to :py:func:`urllib.parse.urlencode`). headers: Py-Dictionary with additional HTTP headers (can be passed to request's headers) """ ret_val = { 'language' : None, 'country' : None, 'subdomain' : None, 'params' : {}, 'headers' : {}, } # language ... _lang = params['language'] _any_language = _lang.lower() == 'all' if _any_language: _lang = 'en-US' language = match_language(_lang, lang_list, custom_aliases) ret_val['language'] = language # the requested language from params (en, en-US, de, de-AT, fr, fr-CA, ...) _l = _lang.split('-') # country ... # the country code (US, AT, CA) _l = _lang.split('-') if len(_l) == 2: country = _l[1] else: country = _l[0].upper() if country == 'EN': country = 'US' ret_val['country'] = country # the combination (en-US, en-EN, de-DE, de-AU, fr-FR, fr-FR) lang_country = '%s-%s' % (language, country) # subdomain ... # subdomain ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com') ret_val['params'] = {} ret_val['headers'] = {} # params & headers lang_country = '%s-%s' % (language, country) # (en-US, en-EN, de-DE, de-AU, fr-FR ..) # hl parameter: # https://developers.google.com/custom-search/docs/xml_results#hlsp The # Interface Language: # https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages ret_val['params']['hl'] = lang_list.get(lang_country, language) # lr parameter: # The lr (language restrict) parameter restricts search results to # documents written in a particular language. # https://developers.google.com/custom-search/docs/xml_results#lrsp # Language Collection Values: # https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections if _any_language and supported_any_language: # based on whoogle # interpretation is left up to Google (based on whoogle) # # - add parameter ``source=lnt`` # - don't use parameter ``lr`` # - don't add a ``Accept-Language`` HTTP header. ret_val['params']['source'] = 'lnt' else: # restricts search results to documents written in a particular # language. ret_val['params']['lr'] = "lang_" + lang_country if lang_country in lang_list else language # Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5 ret_val['headers']['Accept-Language'] = ','.join([ lang_country, Loading @@ -178,18 +249,6 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): '*;q=0.5', ]) # lr parameter: # https://developers.google.com/custom-search/docs/xml_results#lrsp # Language Collection Values: # https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections ret_val['params']['lr'] = "lang_" + lang_country if lang_country in lang_list else language ret_val['params']['hl'] = lang_country if lang_country in lang_list else language # hl parameter: # https://developers.google.com/custom-search/docs/xml_results#hlsp The # Interface Language: # https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages return ret_val def detect_google_sorry(resp): Loading Loading @@ -228,11 +287,8 @@ def request(query, params): query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) if params['safesearch']: query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]}) logger.debug("query_url --> %s", query_url) params['url'] = query_url logger.debug("HTTP header Accept-Language --> %s", lang_info.get('Accept-Language')) params['headers'].update(lang_info['headers']) if use_mobile_ui: params['headers']['Accept'] = '*/*' Loading searx/webapp.py +1 −0 Original line number Diff line number Diff line Loading @@ -1154,6 +1154,7 @@ def run(): app.run( debug=searx_debug, use_debugger=searx_debug, load_dotenv=False, port=settings['server']['port'], host=settings['server']['bind_address'], threaded=True, Loading Loading
README.md +3 −5 Original line number Diff line number Diff line Loading @@ -57,12 +57,10 @@ Then go to http://localhost:8088. ### For developer You can directly run spot, with a python command inside a docker container which contains all dependencies. After running the docker-compose stack you can stop the default spot service and run the debug one with Flask server. You can only run spot, with a python command inside a docker container which contains all dependencies. ``` docker-compose rm -sf spot docker run -it --rm -v $(pwd):/ws -w /ws --hostname spot --network=my-spot_default --env-file .env registry.gitlab.e.foundation:5000/e/cloud/my-spot/env bash docker run -it --rm -v $(pwd):/ws -w /ws -e SEARX_UI_DEFAULT_THEME=etheme -p 8088:80 registry.gitlab.e.foundation:5000/e/cloud/my-spot/env bash PYTHONPATH=$(pwd) SEARX_DEBUG=1 python -X dev searx/webapp.py ```
searx/engines/google.py +83 −27 Original line number Diff line number Diff line Loading @@ -134,42 +134,113 @@ spelling_suggestion_xpath = '//div[@class="med"]/p/a' def get_lang_info(params, lang_list, custom_aliases, supported_any_language): ret_val = {} """Composing various language properties for the google engines. This function is called by the various google engines (google itself, google-images, -news, -scholar, -videos). :param dict param: request parameters of the engine :param list lang_list: list of supported languages of the engine :py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>` :param dict lang_list: custom aliases for non standard language codes (used when calling :py:func:`searx.utils.match_language) :param bool supported_any_language: When a language is not specified, the language interpretation is left up to Google to decide how the search results should be delivered. This argument is ``True`` for the google engine and ``False`` for the other engines (google-images, -news, -scholar, -videos). :rtype: dict :returns: Py-Dictionary with the key/value pairs: language: Return value from :py:func:`searx.utils.match_language country: The country code (e.g. US, AT, CA, FR, DE ..) subdomain: Google subdomain :py:obj:`google_domains` that fits to the country code. params: Py-Dictionary with additional request arguments (can be passed to :py:func:`urllib.parse.urlencode`). headers: Py-Dictionary with additional HTTP headers (can be passed to request's headers) """ ret_val = { 'language' : None, 'country' : None, 'subdomain' : None, 'params' : {}, 'headers' : {}, } # language ... _lang = params['language'] _any_language = _lang.lower() == 'all' if _any_language: _lang = 'en-US' language = match_language(_lang, lang_list, custom_aliases) ret_val['language'] = language # the requested language from params (en, en-US, de, de-AT, fr, fr-CA, ...) _l = _lang.split('-') # country ... # the country code (US, AT, CA) _l = _lang.split('-') if len(_l) == 2: country = _l[1] else: country = _l[0].upper() if country == 'EN': country = 'US' ret_val['country'] = country # the combination (en-US, en-EN, de-DE, de-AU, fr-FR, fr-FR) lang_country = '%s-%s' % (language, country) # subdomain ... # subdomain ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com') ret_val['params'] = {} ret_val['headers'] = {} # params & headers lang_country = '%s-%s' % (language, country) # (en-US, en-EN, de-DE, de-AU, fr-FR ..) # hl parameter: # https://developers.google.com/custom-search/docs/xml_results#hlsp The # Interface Language: # https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages ret_val['params']['hl'] = lang_list.get(lang_country, language) # lr parameter: # The lr (language restrict) parameter restricts search results to # documents written in a particular language. # https://developers.google.com/custom-search/docs/xml_results#lrsp # Language Collection Values: # https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections if _any_language and supported_any_language: # based on whoogle # interpretation is left up to Google (based on whoogle) # # - add parameter ``source=lnt`` # - don't use parameter ``lr`` # - don't add a ``Accept-Language`` HTTP header. ret_val['params']['source'] = 'lnt' else: # restricts search results to documents written in a particular # language. ret_val['params']['lr'] = "lang_" + lang_country if lang_country in lang_list else language # Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5 ret_val['headers']['Accept-Language'] = ','.join([ lang_country, Loading @@ -178,18 +249,6 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): '*;q=0.5', ]) # lr parameter: # https://developers.google.com/custom-search/docs/xml_results#lrsp # Language Collection Values: # https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections ret_val['params']['lr'] = "lang_" + lang_country if lang_country in lang_list else language ret_val['params']['hl'] = lang_country if lang_country in lang_list else language # hl parameter: # https://developers.google.com/custom-search/docs/xml_results#hlsp The # Interface Language: # https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages return ret_val def detect_google_sorry(resp): Loading Loading @@ -228,11 +287,8 @@ def request(query, params): query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) if params['safesearch']: query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]}) logger.debug("query_url --> %s", query_url) params['url'] = query_url logger.debug("HTTP header Accept-Language --> %s", lang_info.get('Accept-Language')) params['headers'].update(lang_info['headers']) if use_mobile_ui: params['headers']['Accept'] = '*/*' Loading
searx/webapp.py +1 −0 Original line number Diff line number Diff line Loading @@ -1154,6 +1154,7 @@ def run(): app.run( debug=searx_debug, use_debugger=searx_debug, load_dotenv=False, port=settings['server']['port'], host=settings['server']['bind_address'], threaded=True, Loading