Loading searx/__init__.py +2 −1 Original line number Diff line number Diff line Loading @@ -28,7 +28,8 @@ except: searx_dir = abspath(dirname(__file__)) engine_dir = dirname(realpath(__file__)) # if possible set path to settings using the enviroment variable SEARX_SETTINGS_PATH # if possible set path to settings using the # enviroment variable SEARX_SETTINGS_PATH if 'SEARX_SETTINGS_PATH' in environ: settings_path = environ['SEARX_SETTINGS_PATH'] # otherwise using default path Loading searx/engines/__init__.py +6 −5 Original line number Diff line number Diff line Loading @@ -41,7 +41,7 @@ def load_module(filename): module.name = modname return module if not 'engines' in settings or not settings['engines']: if 'engines' not in settings or not settings['engines']: print '[E] Error no engines found. Edit your settings.yml' exit(2) Loading Loading @@ -161,7 +161,8 @@ def get_engines_stats(): for engine in scores_per_result: if max_score_per_result: engine['percentage'] = int(engine['avg'] / max_score_per_result * 100) engine['percentage'] = int(engine['avg'] / max_score_per_result * 100) else: engine['percentage'] = 0 Loading searx/query.py +23 −20 Original line number Diff line number Diff line Loading @@ -39,7 +39,8 @@ class Query(object): self.engines = [] self.languages = [] # parse query, if tags are set, which change the serch engine or search-language # parse query, if tags are set, which # change the serch engine or search-language def parse_query(self): self.query_parts = [] Loading @@ -66,11 +67,13 @@ class Query(object): if query_part[0] == ':': lang = query_part[1:].lower() # check if any language-code is equal with declared language-codes # check if any language-code is equal with # declared language-codes for lc in language_codes: lang_id, lang_name, country = map(str.lower, lc) # if correct language-code is found, set it as new search-language # if correct language-code is found # set it as new search-language if lang == lang_id\ or lang_id.startswith(lang)\ or lang == lang_name\ Loading @@ -92,19 +95,20 @@ class Query(object): # check if prefix is equal with engine name elif prefix in engines\ and not prefix in self.blocked_engines: and prefix not in self.blocked_engines: parse_next = True self.engines.append({'category': 'none', 'name': prefix}) # check if prefix is equal with categorie name elif prefix in categories: # using all engines for that search, which are declared under that categorie name # using all engines for that search, which # are declared under that categorie name parse_next = True self.engines.extend({'category': prefix, 'name': engine.name} for engine in categories[prefix] if not engine in self.blocked_engines) if engine not in self.blocked_engines) # append query part to query_part list self.query_parts.append(query_part) Loading @@ -124,4 +128,3 @@ class Query(object): def getFullQuery(self): # get full querry including whitespaces return string.join(self.query_parts, '') searx/search.py +67 −37 Original line number Diff line number Diff line Loading @@ -22,7 +22,7 @@ from datetime import datetime from operator import itemgetter from urlparse import urlparse, unquote from searx.engines import ( categories, engines, engine_shortcuts categories, engines ) from searx.languages import language_codes from searx.utils import gen_useragent Loading @@ -39,7 +39,13 @@ def default_request_params(): # create a callback wrapper for the search engine results def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params): def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params): # creating a callback wrapper for the search engine results def process_callback(response, **kwargs): Loading Loading @@ -126,7 +132,8 @@ def score_results(results): # strip multiple spaces and cariage returns from content if 'content' in res: res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', '')) res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', '')) # get weight of this engine if possible if hasattr(engines[res['engine']], 'weight'): Loading @@ -139,8 +146,12 @@ def score_results(results): duplicated = False for new_res in results: # remove / from the end of the url if required p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa p1 = res['parsed_url'].path[:-1]\ if res['parsed_url'].path.endswith('/')\ else res['parsed_url'].path p2 = new_res['parsed_url'].path[:-1]\ if new_res['parsed_url'].path.endswith('/')\ else new_res['parsed_url'].path # check if that result is a duplicate if res['host'] == new_res['host'] and\ Loading @@ -153,7 +164,8 @@ def score_results(results): # merge duplicates together if duplicated: # using content with more text if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')): if content_result_len(res.get('content', '')) >\ content_result_len(duplicated.get('content', '')): duplicated['content'] = res['content'] # increase result-score Loading Loading @@ -182,17 +194,25 @@ def score_results(results): for i, res in enumerate(results): # FIXME : handle more than one category per engine category = engines[res['engine']].categories[0] + ':' + '' if 'template' not in res else res['template'] current = None if category not in categoryPositions else categoryPositions[category] # group with previous results using the same category if the group can accept more result and is not too far from the current position if current != None and (current['count'] > 0) and (len(gresults) - current['index'] < 20): # group with the previous results using the same category with this one category = engines[res['engine']].categories[0] + ':' + ''\ if 'template' not in res\ else res['template'] current = None if category not in categoryPositions\ else categoryPositions[category] # group with previous results using the same category # if the group can accept more result and is not too far # from the current position if current is not None and (current['count'] > 0)\ and (len(gresults) - current['index'] < 20): # group with the previous results using # the same category with this one index = current['index'] gresults.insert(index, res) # update every index after the current one (including the current one) # update every index after the current one # (including the current one) for k in categoryPositions: v = categoryPositions[k]['index'] if v >= index: Loading @@ -215,7 +235,7 @@ def score_results(results): def merge_two_infoboxes(infobox1, infobox2): if 'urls' in infobox2: urls1 = infobox1.get('urls', None) if urls1 == None: if urls1 is None: urls1 = [] infobox1.set('urls', urls1) Loading @@ -229,7 +249,7 @@ def merge_two_infoboxes(infobox1, infobox2): if 'attributes' in infobox2: attributes1 = infobox1.get('attributes', None) if attributes1 == None: if attributes1 is None: attributes1 = [] infobox1.set('attributes', attributes1) Loading @@ -244,7 +264,7 @@ def merge_two_infoboxes(infobox1, infobox2): if 'content' in infobox2: content1 = infobox1.get('content', None) content2 = infobox2.get('content', '') if content1 != None: if content1 is not None: if content_result_len(content2) > content_result_len(content1): infobox1['content'] = content2 else: Loading @@ -257,9 +277,9 @@ def merge_infoboxes(infoboxes): for infobox in infoboxes: add_infobox = True infobox_id = infobox.get('id', None) if infobox_id != None: if infobox_id is not None: existingIndex = infoboxes_id.get(infobox_id, None) if existingIndex != None: if existingIndex is not None: merge_two_infoboxes(results[existingIndex], infobox) add_infobox = False Loading Loading @@ -318,7 +338,8 @@ class Search(object): self.pageno = int(pageno_param) # parse query, if tags are set, which change the serch engine or search-language # parse query, if tags are set, which change # the serch engine or search-language query_obj = Query(self.request_data['q'], self.blocked_engines) query_obj.parse_query() Loading @@ -334,25 +355,29 @@ class Search(object): self.categories = [] # if engines are calculated from query, set categories by using that informations # if engines are calculated from query, # set categories by using that informations if self.engines: self.categories = list(set(engine['category'] for engine in self.engines)) # otherwise, using defined categories to calculate which engines should be used # otherwise, using defined categories to # calculate which engines should be used else: # set used categories for pd_name, pd in self.request_data.items(): if pd_name.startswith('category_'): category = pd_name[9:] # if category is not found in list, skip if not category in categories: if category not in categories: continue # add category to list self.categories.append(category) # if no category is specified for this search, using user-defined default-configuration which (is stored in cookie) # if no category is specified for this search, # using user-defined default-configuration which # (is stored in cookie) if not self.categories: cookie_categories = request.cookies.get('categories', '') cookie_categories = cookie_categories.split(',') Loading @@ -360,16 +385,18 @@ class Search(object): if ccateg in categories: self.categories.append(ccateg) # if still no category is specified, using general as default-category # if still no category is specified, using general # as default-category if not self.categories: self.categories = ['general'] # using all engines for that search, which are declared under the specific categories # using all engines for that search, which are # declared under the specific categories for categ in self.categories: self.engines.extend({'category': categ, 'name': x.name} for x in categories[categ] if not x.name in self.blocked_engines) if x.name not in self.blocked_engines) # do search-request def search(self, request): Loading Loading @@ -400,7 +427,8 @@ class Search(object): if self.pageno > 1 and not engine.paging: continue # if search-language is set and engine does not provide language-support, skip # if search-language is set and engine does not # provide language-support, skip if self.lang != 'all' and not engine.language_support: continue Loading @@ -412,7 +440,8 @@ class Search(object): request_params['pageno'] = self.pageno request_params['language'] = self.lang # update request parameters dependent on search-engine (contained in engines folder) # update request parameters dependent on # search-engine (contained in engines folder) request_params = engine.request(self.query.encode('utf-8'), request_params) Loading @@ -431,7 +460,8 @@ class Search(object): request_params ) # create dictionary which contain all informations about the request # create dictionary which contain all # informations about the request request_args = dict( headers=request_params['headers'], hooks=dict(response=callback), Loading searx/utils.py +5 −4 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ def gen_useragent(): def searx_useragent(): return 'searx' def highlight_content(content, query): if not content: Loading Loading
searx/__init__.py +2 −1 Original line number Diff line number Diff line Loading @@ -28,7 +28,8 @@ except: searx_dir = abspath(dirname(__file__)) engine_dir = dirname(realpath(__file__)) # if possible set path to settings using the enviroment variable SEARX_SETTINGS_PATH # if possible set path to settings using the # enviroment variable SEARX_SETTINGS_PATH if 'SEARX_SETTINGS_PATH' in environ: settings_path = environ['SEARX_SETTINGS_PATH'] # otherwise using default path Loading
searx/engines/__init__.py +6 −5 Original line number Diff line number Diff line Loading @@ -41,7 +41,7 @@ def load_module(filename): module.name = modname return module if not 'engines' in settings or not settings['engines']: if 'engines' not in settings or not settings['engines']: print '[E] Error no engines found. Edit your settings.yml' exit(2) Loading Loading @@ -161,7 +161,8 @@ def get_engines_stats(): for engine in scores_per_result: if max_score_per_result: engine['percentage'] = int(engine['avg'] / max_score_per_result * 100) engine['percentage'] = int(engine['avg'] / max_score_per_result * 100) else: engine['percentage'] = 0 Loading
searx/query.py +23 −20 Original line number Diff line number Diff line Loading @@ -39,7 +39,8 @@ class Query(object): self.engines = [] self.languages = [] # parse query, if tags are set, which change the serch engine or search-language # parse query, if tags are set, which # change the serch engine or search-language def parse_query(self): self.query_parts = [] Loading @@ -66,11 +67,13 @@ class Query(object): if query_part[0] == ':': lang = query_part[1:].lower() # check if any language-code is equal with declared language-codes # check if any language-code is equal with # declared language-codes for lc in language_codes: lang_id, lang_name, country = map(str.lower, lc) # if correct language-code is found, set it as new search-language # if correct language-code is found # set it as new search-language if lang == lang_id\ or lang_id.startswith(lang)\ or lang == lang_name\ Loading @@ -92,19 +95,20 @@ class Query(object): # check if prefix is equal with engine name elif prefix in engines\ and not prefix in self.blocked_engines: and prefix not in self.blocked_engines: parse_next = True self.engines.append({'category': 'none', 'name': prefix}) # check if prefix is equal with categorie name elif prefix in categories: # using all engines for that search, which are declared under that categorie name # using all engines for that search, which # are declared under that categorie name parse_next = True self.engines.extend({'category': prefix, 'name': engine.name} for engine in categories[prefix] if not engine in self.blocked_engines) if engine not in self.blocked_engines) # append query part to query_part list self.query_parts.append(query_part) Loading @@ -124,4 +128,3 @@ class Query(object): def getFullQuery(self): # get full querry including whitespaces return string.join(self.query_parts, '')
searx/search.py +67 −37 Original line number Diff line number Diff line Loading @@ -22,7 +22,7 @@ from datetime import datetime from operator import itemgetter from urlparse import urlparse, unquote from searx.engines import ( categories, engines, engine_shortcuts categories, engines ) from searx.languages import language_codes from searx.utils import gen_useragent Loading @@ -39,7 +39,13 @@ def default_request_params(): # create a callback wrapper for the search engine results def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params): def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params): # creating a callback wrapper for the search engine results def process_callback(response, **kwargs): Loading Loading @@ -126,7 +132,8 @@ def score_results(results): # strip multiple spaces and cariage returns from content if 'content' in res: res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', '')) res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', '')) # get weight of this engine if possible if hasattr(engines[res['engine']], 'weight'): Loading @@ -139,8 +146,12 @@ def score_results(results): duplicated = False for new_res in results: # remove / from the end of the url if required p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa p1 = res['parsed_url'].path[:-1]\ if res['parsed_url'].path.endswith('/')\ else res['parsed_url'].path p2 = new_res['parsed_url'].path[:-1]\ if new_res['parsed_url'].path.endswith('/')\ else new_res['parsed_url'].path # check if that result is a duplicate if res['host'] == new_res['host'] and\ Loading @@ -153,7 +164,8 @@ def score_results(results): # merge duplicates together if duplicated: # using content with more text if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')): if content_result_len(res.get('content', '')) >\ content_result_len(duplicated.get('content', '')): duplicated['content'] = res['content'] # increase result-score Loading Loading @@ -182,17 +194,25 @@ def score_results(results): for i, res in enumerate(results): # FIXME : handle more than one category per engine category = engines[res['engine']].categories[0] + ':' + '' if 'template' not in res else res['template'] current = None if category not in categoryPositions else categoryPositions[category] # group with previous results using the same category if the group can accept more result and is not too far from the current position if current != None and (current['count'] > 0) and (len(gresults) - current['index'] < 20): # group with the previous results using the same category with this one category = engines[res['engine']].categories[0] + ':' + ''\ if 'template' not in res\ else res['template'] current = None if category not in categoryPositions\ else categoryPositions[category] # group with previous results using the same category # if the group can accept more result and is not too far # from the current position if current is not None and (current['count'] > 0)\ and (len(gresults) - current['index'] < 20): # group with the previous results using # the same category with this one index = current['index'] gresults.insert(index, res) # update every index after the current one (including the current one) # update every index after the current one # (including the current one) for k in categoryPositions: v = categoryPositions[k]['index'] if v >= index: Loading @@ -215,7 +235,7 @@ def score_results(results): def merge_two_infoboxes(infobox1, infobox2): if 'urls' in infobox2: urls1 = infobox1.get('urls', None) if urls1 == None: if urls1 is None: urls1 = [] infobox1.set('urls', urls1) Loading @@ -229,7 +249,7 @@ def merge_two_infoboxes(infobox1, infobox2): if 'attributes' in infobox2: attributes1 = infobox1.get('attributes', None) if attributes1 == None: if attributes1 is None: attributes1 = [] infobox1.set('attributes', attributes1) Loading @@ -244,7 +264,7 @@ def merge_two_infoboxes(infobox1, infobox2): if 'content' in infobox2: content1 = infobox1.get('content', None) content2 = infobox2.get('content', '') if content1 != None: if content1 is not None: if content_result_len(content2) > content_result_len(content1): infobox1['content'] = content2 else: Loading @@ -257,9 +277,9 @@ def merge_infoboxes(infoboxes): for infobox in infoboxes: add_infobox = True infobox_id = infobox.get('id', None) if infobox_id != None: if infobox_id is not None: existingIndex = infoboxes_id.get(infobox_id, None) if existingIndex != None: if existingIndex is not None: merge_two_infoboxes(results[existingIndex], infobox) add_infobox = False Loading Loading @@ -318,7 +338,8 @@ class Search(object): self.pageno = int(pageno_param) # parse query, if tags are set, which change the serch engine or search-language # parse query, if tags are set, which change # the serch engine or search-language query_obj = Query(self.request_data['q'], self.blocked_engines) query_obj.parse_query() Loading @@ -334,25 +355,29 @@ class Search(object): self.categories = [] # if engines are calculated from query, set categories by using that informations # if engines are calculated from query, # set categories by using that informations if self.engines: self.categories = list(set(engine['category'] for engine in self.engines)) # otherwise, using defined categories to calculate which engines should be used # otherwise, using defined categories to # calculate which engines should be used else: # set used categories for pd_name, pd in self.request_data.items(): if pd_name.startswith('category_'): category = pd_name[9:] # if category is not found in list, skip if not category in categories: if category not in categories: continue # add category to list self.categories.append(category) # if no category is specified for this search, using user-defined default-configuration which (is stored in cookie) # if no category is specified for this search, # using user-defined default-configuration which # (is stored in cookie) if not self.categories: cookie_categories = request.cookies.get('categories', '') cookie_categories = cookie_categories.split(',') Loading @@ -360,16 +385,18 @@ class Search(object): if ccateg in categories: self.categories.append(ccateg) # if still no category is specified, using general as default-category # if still no category is specified, using general # as default-category if not self.categories: self.categories = ['general'] # using all engines for that search, which are declared under the specific categories # using all engines for that search, which are # declared under the specific categories for categ in self.categories: self.engines.extend({'category': categ, 'name': x.name} for x in categories[categ] if not x.name in self.blocked_engines) if x.name not in self.blocked_engines) # do search-request def search(self, request): Loading Loading @@ -400,7 +427,8 @@ class Search(object): if self.pageno > 1 and not engine.paging: continue # if search-language is set and engine does not provide language-support, skip # if search-language is set and engine does not # provide language-support, skip if self.lang != 'all' and not engine.language_support: continue Loading @@ -412,7 +440,8 @@ class Search(object): request_params['pageno'] = self.pageno request_params['language'] = self.lang # update request parameters dependent on search-engine (contained in engines folder) # update request parameters dependent on # search-engine (contained in engines folder) request_params = engine.request(self.query.encode('utf-8'), request_params) Loading @@ -431,7 +460,8 @@ class Search(object): request_params ) # create dictionary which contain all informations about the request # create dictionary which contain all # informations about the request request_args = dict( headers=request_params['headers'], hooks=dict(response=callback), Loading
searx/utils.py +5 −4 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ def gen_useragent(): def searx_useragent(): return 'searx' def highlight_content(content, query): if not content: Loading