Loading searx/search_database.py +75 −35 Original line number Diff line number Diff line import base64 import json import pymysql class Search(object): def __init__(self, categories, query, pageno, paging, safe_search, language, time_range, engines, results, results_number, results_length, answers, corrections, infoboxes, suggestions, unresponsive_engines): self.categories = categories self.query = query self.pageno = pageno self.paging = paging self.safe_search = safe_search self.language = language self.time_range = time_range self.engines = engines def __init__(self, search_query, results, paging, results_number, answers, corrections, infoboxes, suggestions, unresponsive_engines): self.categories = search_query.categories self.query = search_query.query self.pageno = search_query.pageno self.safe_search = search_query.safesearch self.language = search_query.lang self.time_range = search_query.time_range self.engines = search_query.engines self.results = results self.paging = paging self.results_number = results_number self.results_length = results_length self.answers = answers self.corrections = corrections self.infoboxes = infoboxes Loading @@ -24,31 +24,71 @@ class Search(object): self.unresponsive_engines = unresponsive_engines def read(categories, query, pageno, safe_search, language, time_range, engines, mysql_settings): if len(categories) != 1: def read(q, settings): time_range = q.time_range if time_range == "": time_range = "None" connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'], database=settings['database']) try: with connection.cursor() as cursor: sql = "SELECT RESULTS, PAGING, RESULTS_NUMBER, ANSWERS, CORRECTIONS, INFOBOXES, SUGGESTIONS, " \ "UNRESPONSIVE_ENGINES FROM SEARCH_HISTORY WHERE QUERY='%s' AND CATEGORIES='%s' AND PAGENO=%s AND " \ "SAFE_SEARCH=%s AND LANGUAGE='%s' AND TIME_RANGE='%s' AND ENGINES='%s'" cursor.execute( sql % (e(q.query), je(q.categories), q.pageno, q.safesearch, e(q.lang), time_range, je(q.engines))) for result in cursor: return Search(q, jd(result[0]), result[1] != 0, result[2], jd(result[3]), jd(result[4]), jd(result[5]), jd(result[6]), jd(result[7])) finally: connection.close() return None category = categories[0].upper().replace(" ", "_") with pymysql.connect(host=mysql_settings['host'], user=mysql_settings['user'], password=mysql_settings['password'], database=mysql_settings['database'], charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) as connection: def save(q, r, settings): results_number = r.results_number() if results_number < r.results_length(): results_number = 0 results = r.get_ordered_results() for result in results: result['engines'] = list(result['engines']) time_range = q.time_range if time_range == "": time_range = "None" connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'], database=settings['database']) try: with connection.cursor() as cursor: sql = "SELECT RESULTS, PAGING, RESULTS_NUMBER, RESULTS_LENGTH, ANSWERS, CORRECTIONS, INFOBOXES, " \ "SUGGESTIONS, UNRESPONSIVE_ENGINES FROM %s WHERE QUERY=%s AND PAGENO=%s AND SAFE_SEARCH=%s" \ " AND LANGUAGE=%s AND TIME_RANGE=%s AND ENGINES=%s" cursor.execute(sql, (category, query, pageno, safe_search, language, time_range, str(engines).replace("'", '"'))) for result in cursor: return Search(categories, query, pageno, result[1] != 0, safe_search, language, time_range, engines, json.loads(result[0]), result[2], result[3], json.loads(result[4]), json.loads(result[5]), json.loads(result[6]), json.loads(result[7]), json.loads(result[8])) return None sql = "INSERT INTO SEARCH_HISTORY(QUERY, CATEGORIES, PAGENO, SAFE_SEARCH, LANGUAGE, TIME_RANGE, ENGINES, " \ "RESULTS, PAGING, RESULTS_NUMBER, ANSWERS, CORRECTIONS, INFOBOXES, SUGGESTIONS, " \ "UNRESPONSIVE_ENGINES) VALUES('%s', '%s', %s, %s, '%s', '%s', '%s', '%s', %s, %s, '%s', '%s', '%s'," \ " '%s', '%s')" cursor.execute(sql % (e(q.query), je(q.categories), q.pageno, q.safesearch, e(q.lang), time_range, je(q.engines), jle(results), r.paging, results_number, jle(r.answers), jle(r.corrections), je(r.infoboxes), jle(r.suggestions), jle(r.unresponsive_engines))) connection.commit() finally: connection.close() return Search(q, results, r.paging, results_number, r.answers, r.corrections, r.infoboxes, r.suggestions, r.unresponsive_engines) def e(uncoded): return base64.b64encode(uncoded) def d(coded): return base64.b64decode(coded) def je(uncoded): return base64.b64encode(json.dumps(uncoded)) def jle(uncoded): return base64.b64encode(json.dumps(list(uncoded))) def save(search_query): path = find_path(search_query) writer = open(path, 'w') def jd(coded): return json.loads(base64.b64decode(coded)) searx/settings.yml +6 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,12 @@ server: image_proxy : False # Proxying image results through searx http_protocol_version : "1.0" # 1.0 and 1.1 are supported mysql: host : "127.0.0.1" user : "searx" password : "password" # change this! database : "searx" ui: static_path : "" # Custom static path - leave it blank if you didn't change templates_path : "" # Custom templates path - leave it blank if you didn't change Loading searx/webapp.py +45 −77 Original line number Diff line number Diff line Loading @@ -59,7 +59,7 @@ from searx.engines import ( categories, engines, engine_shortcuts, get_engines_stats, initialize_engines ) from searx.utils import ( UnicodeWriter, highlight_content, html_to_text, get_resources_directory, highlight_content, get_resources_directory, get_static_files, get_result_templates, get_themes, gen_useragent, dict_subset, prettify_url, match_language ) Loading @@ -74,7 +74,7 @@ from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.answerers import answerers from searx.url_utils import urlencode, urlparse, urljoin from searx.utils import new_hmac from searx.results import ResultContainer from searx.search_database import read, save, Search # check if the pyopenssl package is installed. # It is needed for SSL connection without trouble, see #298 Loading Loading @@ -435,6 +435,7 @@ def pre_request(): or plugin.id in allowed_plugins): request.user_plugins.append(plugin) def config_results(results, query): for result in results: if 'content' in result and result['content']: Loading @@ -461,6 +462,7 @@ def config_results(results, query): else: result['publishedDate'] = format_date(result['publishedDate']) def index_error(): request.errors.append(gettext('search error')) return render( Loading @@ -468,6 +470,15 @@ def index_error(): ) def start_search(search_query, user_plugins): search = read(search_query, settings['mysql']) if search == None: # result_container = Search(search_query).search() # without plugins result_container = SearchWithPlugins(search_query, user_plugins, request).search() return save(search_query, result_container, settings['mysql']) return search @app.route('/search', methods=['GET', 'POST']) @app.route('/', methods=['GET', 'POST']) def index(): Loading @@ -478,8 +489,7 @@ def index(): ) # search search_query = None result_container = None search = None try: # we dont want users to select multiple categories, this simplifies the experience. if request.form.get("categories"): Loading @@ -495,10 +505,7 @@ def index(): print(request.form) search_query = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) result_container = search.search() search = start_search(search_query, request.user_plugins) except Exception as e: # log exception logger.exception('search error') Loading @@ -510,93 +517,54 @@ def index(): return index_error(), 500 # serarch images results_images = [] if search_query.categories == ['general'] and search_query.pageno == 1: search_images_engines = [] images = [] if search.categories == ['general'] and search.pageno == 1: images_engines = [] disabled_engines = request.preferences.engines.get_disabled() for engine in categories['images']: if (engine.name, 'images') not in disabled_engines: search_images_engines.append({'category': 'images', 'name': engine.name}) images_search_query = SearchQuery(search_query.query.decode('utf8'), search_images_engines, ['images'], search_query.lang, search_query.safesearch, 1, search_query.time_range) results_images_big = SearchWithPlugins(images_search_query, request.user_plugins, request).search().get_ordered_results() images_engines.append({'category': 'images', 'name': engine.name}) search_query = SearchQuery(search.query.decode('utf8'), images_engines, ['images'], search.language, search.safe_search, search.pageno, search.time_range) for image in results_images_big[:min(5, len(results_images_big))]: results_images.append(image) all_images = start_search(search_query, request.user_plugins).results # results results = result_container.get_ordered_results() number_of_results = result_container.results_number() if number_of_results < result_container.results_length(): number_of_results = 0 for image in all_images[:min(5, len(all_images))]: images.append(image) results = list(search.results) # UI advanced_search = request.form.get('advanced_search', None) # output config_results(results, search_query.query) config_results(results_images, search_query.query) config_results(results, search.query) config_results(images, search.query) output_format = request.form.get('format', 'html') if output_format not in ['html', 'csv', 'json', 'rss']: output_format = 'html' if output_format == 'json': return Response(json.dumps({'query': search_query.query.decode('utf-8'), 'number_of_results': number_of_results, 'results': results, 'answers': list(result_container.answers), 'corrections': list(result_container.corrections), 'infoboxes': result_container.infoboxes, 'suggestions': list(result_container.suggestions), 'unresponsive_engines': list(result_container.unresponsive_engines)}, default=lambda item: list(item) if isinstance(item, set) else item), mimetype='application/json') elif output_format == 'csv': csv = UnicodeWriter(StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score') csv.writerow(keys) for row in results: row['host'] = row['parsed_url'].netloc csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) response.headers.add('Content-Disposition', cont_disp) return response elif output_format == 'rss': response_rss = render( 'opensearch_response_rss.xml', results=results, q=request.form['q'], number_of_results=number_of_results, base_url=get_base_url(), override_theme='__common__', ) return Response(response_rss, mimetype='text/xml') return render( 'results.html', results=results, q=request.form['q'], selected_categories=search_query.categories, pageno=search_query.pageno, time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), selected_categories=search.categories, pageno=search.pageno, time_range=search.time_range, number_of_results=format_decimal(search.results_number), advanced_search=advanced_search, suggestions=result_container.suggestions, answers=result_container.answers, corrections=result_container.corrections, infoboxes=result_container.infoboxes, paging=result_container.paging, unresponsive_engines=result_container.unresponsive_engines, current_language=match_language(search_query.lang, suggestions=search.suggestions, answers=search.answers, corrections=search.corrections, infoboxes=search.infoboxes, paging=search.paging, unresponsive_engines=search.unresponsive_engines, current_language=match_language(search.language, LANGUAGE_CODES, fallback=settings['search']['language']), image_results=results_images, image_results=images, base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())] Loading Loading
searx/search_database.py +75 −35 Original line number Diff line number Diff line import base64 import json import pymysql class Search(object): def __init__(self, categories, query, pageno, paging, safe_search, language, time_range, engines, results, results_number, results_length, answers, corrections, infoboxes, suggestions, unresponsive_engines): self.categories = categories self.query = query self.pageno = pageno self.paging = paging self.safe_search = safe_search self.language = language self.time_range = time_range self.engines = engines def __init__(self, search_query, results, paging, results_number, answers, corrections, infoboxes, suggestions, unresponsive_engines): self.categories = search_query.categories self.query = search_query.query self.pageno = search_query.pageno self.safe_search = search_query.safesearch self.language = search_query.lang self.time_range = search_query.time_range self.engines = search_query.engines self.results = results self.paging = paging self.results_number = results_number self.results_length = results_length self.answers = answers self.corrections = corrections self.infoboxes = infoboxes Loading @@ -24,31 +24,71 @@ class Search(object): self.unresponsive_engines = unresponsive_engines def read(categories, query, pageno, safe_search, language, time_range, engines, mysql_settings): if len(categories) != 1: def read(q, settings): time_range = q.time_range if time_range == "": time_range = "None" connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'], database=settings['database']) try: with connection.cursor() as cursor: sql = "SELECT RESULTS, PAGING, RESULTS_NUMBER, ANSWERS, CORRECTIONS, INFOBOXES, SUGGESTIONS, " \ "UNRESPONSIVE_ENGINES FROM SEARCH_HISTORY WHERE QUERY='%s' AND CATEGORIES='%s' AND PAGENO=%s AND " \ "SAFE_SEARCH=%s AND LANGUAGE='%s' AND TIME_RANGE='%s' AND ENGINES='%s'" cursor.execute( sql % (e(q.query), je(q.categories), q.pageno, q.safesearch, e(q.lang), time_range, je(q.engines))) for result in cursor: return Search(q, jd(result[0]), result[1] != 0, result[2], jd(result[3]), jd(result[4]), jd(result[5]), jd(result[6]), jd(result[7])) finally: connection.close() return None category = categories[0].upper().replace(" ", "_") with pymysql.connect(host=mysql_settings['host'], user=mysql_settings['user'], password=mysql_settings['password'], database=mysql_settings['database'], charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) as connection: def save(q, r, settings): results_number = r.results_number() if results_number < r.results_length(): results_number = 0 results = r.get_ordered_results() for result in results: result['engines'] = list(result['engines']) time_range = q.time_range if time_range == "": time_range = "None" connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'], database=settings['database']) try: with connection.cursor() as cursor: sql = "SELECT RESULTS, PAGING, RESULTS_NUMBER, RESULTS_LENGTH, ANSWERS, CORRECTIONS, INFOBOXES, " \ "SUGGESTIONS, UNRESPONSIVE_ENGINES FROM %s WHERE QUERY=%s AND PAGENO=%s AND SAFE_SEARCH=%s" \ " AND LANGUAGE=%s AND TIME_RANGE=%s AND ENGINES=%s" cursor.execute(sql, (category, query, pageno, safe_search, language, time_range, str(engines).replace("'", '"'))) for result in cursor: return Search(categories, query, pageno, result[1] != 0, safe_search, language, time_range, engines, json.loads(result[0]), result[2], result[3], json.loads(result[4]), json.loads(result[5]), json.loads(result[6]), json.loads(result[7]), json.loads(result[8])) return None sql = "INSERT INTO SEARCH_HISTORY(QUERY, CATEGORIES, PAGENO, SAFE_SEARCH, LANGUAGE, TIME_RANGE, ENGINES, " \ "RESULTS, PAGING, RESULTS_NUMBER, ANSWERS, CORRECTIONS, INFOBOXES, SUGGESTIONS, " \ "UNRESPONSIVE_ENGINES) VALUES('%s', '%s', %s, %s, '%s', '%s', '%s', '%s', %s, %s, '%s', '%s', '%s'," \ " '%s', '%s')" cursor.execute(sql % (e(q.query), je(q.categories), q.pageno, q.safesearch, e(q.lang), time_range, je(q.engines), jle(results), r.paging, results_number, jle(r.answers), jle(r.corrections), je(r.infoboxes), jle(r.suggestions), jle(r.unresponsive_engines))) connection.commit() finally: connection.close() return Search(q, results, r.paging, results_number, r.answers, r.corrections, r.infoboxes, r.suggestions, r.unresponsive_engines) def e(uncoded): return base64.b64encode(uncoded) def d(coded): return base64.b64decode(coded) def je(uncoded): return base64.b64encode(json.dumps(uncoded)) def jle(uncoded): return base64.b64encode(json.dumps(list(uncoded))) def save(search_query): path = find_path(search_query) writer = open(path, 'w') def jd(coded): return json.loads(base64.b64decode(coded))
searx/settings.yml +6 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,12 @@ server: image_proxy : False # Proxying image results through searx http_protocol_version : "1.0" # 1.0 and 1.1 are supported mysql: host : "127.0.0.1" user : "searx" password : "password" # change this! database : "searx" ui: static_path : "" # Custom static path - leave it blank if you didn't change templates_path : "" # Custom templates path - leave it blank if you didn't change Loading
searx/webapp.py +45 −77 Original line number Diff line number Diff line Loading @@ -59,7 +59,7 @@ from searx.engines import ( categories, engines, engine_shortcuts, get_engines_stats, initialize_engines ) from searx.utils import ( UnicodeWriter, highlight_content, html_to_text, get_resources_directory, highlight_content, get_resources_directory, get_static_files, get_result_templates, get_themes, gen_useragent, dict_subset, prettify_url, match_language ) Loading @@ -74,7 +74,7 @@ from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.answerers import answerers from searx.url_utils import urlencode, urlparse, urljoin from searx.utils import new_hmac from searx.results import ResultContainer from searx.search_database import read, save, Search # check if the pyopenssl package is installed. # It is needed for SSL connection without trouble, see #298 Loading Loading @@ -435,6 +435,7 @@ def pre_request(): or plugin.id in allowed_plugins): request.user_plugins.append(plugin) def config_results(results, query): for result in results: if 'content' in result and result['content']: Loading @@ -461,6 +462,7 @@ def config_results(results, query): else: result['publishedDate'] = format_date(result['publishedDate']) def index_error(): request.errors.append(gettext('search error')) return render( Loading @@ -468,6 +470,15 @@ def index_error(): ) def start_search(search_query, user_plugins): search = read(search_query, settings['mysql']) if search == None: # result_container = Search(search_query).search() # without plugins result_container = SearchWithPlugins(search_query, user_plugins, request).search() return save(search_query, result_container, settings['mysql']) return search @app.route('/search', methods=['GET', 'POST']) @app.route('/', methods=['GET', 'POST']) def index(): Loading @@ -478,8 +489,7 @@ def index(): ) # search search_query = None result_container = None search = None try: # we dont want users to select multiple categories, this simplifies the experience. if request.form.get("categories"): Loading @@ -495,10 +505,7 @@ def index(): print(request.form) search_query = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) result_container = search.search() search = start_search(search_query, request.user_plugins) except Exception as e: # log exception logger.exception('search error') Loading @@ -510,93 +517,54 @@ def index(): return index_error(), 500 # serarch images results_images = [] if search_query.categories == ['general'] and search_query.pageno == 1: search_images_engines = [] images = [] if search.categories == ['general'] and search.pageno == 1: images_engines = [] disabled_engines = request.preferences.engines.get_disabled() for engine in categories['images']: if (engine.name, 'images') not in disabled_engines: search_images_engines.append({'category': 'images', 'name': engine.name}) images_search_query = SearchQuery(search_query.query.decode('utf8'), search_images_engines, ['images'], search_query.lang, search_query.safesearch, 1, search_query.time_range) results_images_big = SearchWithPlugins(images_search_query, request.user_plugins, request).search().get_ordered_results() images_engines.append({'category': 'images', 'name': engine.name}) search_query = SearchQuery(search.query.decode('utf8'), images_engines, ['images'], search.language, search.safe_search, search.pageno, search.time_range) for image in results_images_big[:min(5, len(results_images_big))]: results_images.append(image) all_images = start_search(search_query, request.user_plugins).results # results results = result_container.get_ordered_results() number_of_results = result_container.results_number() if number_of_results < result_container.results_length(): number_of_results = 0 for image in all_images[:min(5, len(all_images))]: images.append(image) results = list(search.results) # UI advanced_search = request.form.get('advanced_search', None) # output config_results(results, search_query.query) config_results(results_images, search_query.query) config_results(results, search.query) config_results(images, search.query) output_format = request.form.get('format', 'html') if output_format not in ['html', 'csv', 'json', 'rss']: output_format = 'html' if output_format == 'json': return Response(json.dumps({'query': search_query.query.decode('utf-8'), 'number_of_results': number_of_results, 'results': results, 'answers': list(result_container.answers), 'corrections': list(result_container.corrections), 'infoboxes': result_container.infoboxes, 'suggestions': list(result_container.suggestions), 'unresponsive_engines': list(result_container.unresponsive_engines)}, default=lambda item: list(item) if isinstance(item, set) else item), mimetype='application/json') elif output_format == 'csv': csv = UnicodeWriter(StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score') csv.writerow(keys) for row in results: row['host'] = row['parsed_url'].netloc csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) response.headers.add('Content-Disposition', cont_disp) return response elif output_format == 'rss': response_rss = render( 'opensearch_response_rss.xml', results=results, q=request.form['q'], number_of_results=number_of_results, base_url=get_base_url(), override_theme='__common__', ) return Response(response_rss, mimetype='text/xml') return render( 'results.html', results=results, q=request.form['q'], selected_categories=search_query.categories, pageno=search_query.pageno, time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), selected_categories=search.categories, pageno=search.pageno, time_range=search.time_range, number_of_results=format_decimal(search.results_number), advanced_search=advanced_search, suggestions=result_container.suggestions, answers=result_container.answers, corrections=result_container.corrections, infoboxes=result_container.infoboxes, paging=result_container.paging, unresponsive_engines=result_container.unresponsive_engines, current_language=match_language(search_query.lang, suggestions=search.suggestions, answers=search.answers, corrections=search.corrections, infoboxes=search.infoboxes, paging=search.paging, unresponsive_engines=search.unresponsive_engines, current_language=match_language(search.language, LANGUAGE_CODES, fallback=settings['search']['language']), image_results=results_images, image_results=images, base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())] Loading