Commit dd16a6db authored by Johnny Kalajdzic's avatar Johnny Kalajdzic

Make engine faster with a history in MySql

parent d54a30b9
import base64
import json
import pymysql
class Search(object):
def __init__(self, categories, query, pageno, paging, safe_search, language, time_range, engines, results,
results_number, results_length, answers, corrections, infoboxes, suggestions, unresponsive_engines):
self.categories = categories
self.query = query
self.pageno = pageno
self.paging = paging
self.safe_search = safe_search
self.language = language
self.time_range = time_range
self.engines = engines
def __init__(self, search_query, results, paging,
results_number, answers, corrections, infoboxes, suggestions, unresponsive_engines):
self.categories = search_query.categories
self.query = search_query.query
self.pageno = search_query.pageno
self.safe_search = search_query.safesearch
self.language = search_query.lang
self.time_range = search_query.time_range
self.engines = search_query.engines
self.results = results
self.paging = paging
self.results_number = results_number
self.results_length = results_length
self.answers = answers
self.corrections = corrections
self.infoboxes = infoboxes
......@@ -24,31 +24,71 @@ class Search(object):
self.unresponsive_engines = unresponsive_engines
def read(categories, query, pageno, safe_search, language, time_range, engines, mysql_settings):
if len(categories) != 1:
return None
category = categories[0].upper().replace(" ", "_")
with pymysql.connect(host=mysql_settings['host'],
user=mysql_settings['user'],
password=mysql_settings['password'],
database=mysql_settings['database'],
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor) as connection:
def read(q, settings):
time_range = q.time_range
if time_range == "":
time_range = "None"
connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'],
database=settings['database'])
try:
with connection.cursor() as cursor:
sql = "SELECT RESULTS, PAGING, RESULTS_NUMBER, RESULTS_LENGTH, ANSWERS, CORRECTIONS, INFOBOXES, " \
"SUGGESTIONS, UNRESPONSIVE_ENGINES FROM %s WHERE QUERY=%s AND PAGENO=%s AND SAFE_SEARCH=%s" \
" AND LANGUAGE=%s AND TIME_RANGE=%s AND ENGINES=%s"
cursor.execute(sql,
(category, query, pageno, safe_search, language, time_range, str(engines).replace("'", '"')))
sql = "SELECT RESULTS, PAGING, RESULTS_NUMBER, ANSWERS, CORRECTIONS, INFOBOXES, SUGGESTIONS, " \
"UNRESPONSIVE_ENGINES FROM SEARCH_HISTORY WHERE QUERY='%s' AND CATEGORIES='%s' AND PAGENO=%s AND " \
"SAFE_SEARCH=%s AND LANGUAGE='%s' AND TIME_RANGE='%s' AND ENGINES='%s'"
cursor.execute(
sql % (e(q.query), je(q.categories), q.pageno, q.safesearch, e(q.lang), time_range, je(q.engines)))
for result in cursor:
return Search(categories, query, pageno, result[1] != 0, safe_search, language, time_range, engines,
json.loads(result[0]), result[2], result[3], json.loads(result[4]),
json.loads(result[5]), json.loads(result[6]), json.loads(result[7]),
json.loads(result[8]))
return Search(q, jd(result[0]), result[1] != 0, result[2], jd(result[3]),
jd(result[4]), jd(result[5]), jd(result[6]), jd(result[7]))
finally:
connection.close()
return None
def save(search_query):
path = find_path(search_query)
writer = open(path, 'w')
def save(q, r, settings):
results_number = r.results_number()
if results_number < r.results_length():
results_number = 0
results = r.get_ordered_results()
for result in results:
result['engines'] = list(result['engines'])
time_range = q.time_range
if time_range == "":
time_range = "None"
connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'],
database=settings['database'])
try:
with connection.cursor() as cursor:
sql = "INSERT INTO SEARCH_HISTORY(QUERY, CATEGORIES, PAGENO, SAFE_SEARCH, LANGUAGE, TIME_RANGE, ENGINES, " \
"RESULTS, PAGING, RESULTS_NUMBER, ANSWERS, CORRECTIONS, INFOBOXES, SUGGESTIONS, " \
"UNRESPONSIVE_ENGINES) VALUES('%s', '%s', %s, %s, '%s', '%s', '%s', '%s', %s, %s, '%s', '%s', '%s'," \
" '%s', '%s')"
cursor.execute(sql % (e(q.query), je(q.categories), q.pageno, q.safesearch, e(q.lang), time_range,
je(q.engines), jle(results), r.paging, results_number, jle(r.answers),
jle(r.corrections), je(r.infoboxes), jle(r.suggestions), jle(r.unresponsive_engines)))
connection.commit()
finally:
connection.close()
return Search(q, results, r.paging, results_number, r.answers, r.corrections,
r.infoboxes, r.suggestions, r.unresponsive_engines)
def e(uncoded):
return base64.b64encode(uncoded)
def d(coded):
return base64.b64decode(coded)
def je(uncoded):
return base64.b64encode(json.dumps(uncoded))
def jle(uncoded):
return base64.b64encode(json.dumps(list(uncoded)))
def jd(coded):
return json.loads(base64.b64decode(coded))
......@@ -15,6 +15,12 @@ server:
image_proxy : False # Proxying image results through searx
http_protocol_version : "1.0" # 1.0 and 1.1 are supported
mysql:
host : "127.0.0.1"
user : "searx"
password : "password" # change this!
database : "searx"
ui:
static_path : "" # Custom static path - leave it blank if you didn't change
templates_path : "" # Custom templates path - leave it blank if you didn't change
......
......@@ -59,7 +59,7 @@ from searx.engines import (
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
)
from searx.utils import (
UnicodeWriter, highlight_content, html_to_text, get_resources_directory,
highlight_content, get_resources_directory,
get_static_files, get_result_templates, get_themes, gen_useragent,
dict_subset, prettify_url, match_language
)
......@@ -74,7 +74,7 @@ from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers
from searx.url_utils import urlencode, urlparse, urljoin
from searx.utils import new_hmac
from searx.results import ResultContainer
from searx.search_database import read, save, Search
# check if the pyopenssl package is installed.
# It is needed for SSL connection without trouble, see #298
......@@ -435,6 +435,7 @@ def pre_request():
or plugin.id in allowed_plugins):
request.user_plugins.append(plugin)
def config_results(results, query):
for result in results:
if 'content' in result and result['content']:
......@@ -461,11 +462,21 @@ def config_results(results, query):
else:
result['publishedDate'] = format_date(result['publishedDate'])
def index_error():
request.errors.append(gettext('search error'))
return render(
'index.html',
)
request.errors.append(gettext('search error'))
return render(
'index.html',
)
def start_search(search_query, user_plugins):
search = read(search_query, settings['mysql'])
if search == None:
# result_container = Search(search_query).search() # without plugins
result_container = SearchWithPlugins(search_query, user_plugins, request).search()
return save(search_query, result_container, settings['mysql'])
return search
@app.route('/search', methods=['GET', 'POST'])
......@@ -478,8 +489,7 @@ def index():
)
# search
search_query = None
result_container = None
search = None
try:
# we dont want users to select multiple categories, this simplifies the experience.
if request.form.get("categories"):
......@@ -495,10 +505,7 @@ def index():
print(request.form)
search_query = get_search_query_from_webapp(request.preferences, request.form)
# search = Search(search_query) # without plugins
search = SearchWithPlugins(search_query, request.user_plugins, request)
result_container = search.search()
search = start_search(search_query, request.user_plugins)
except Exception as e:
# log exception
logger.exception('search error')
......@@ -510,93 +517,54 @@ def index():
return index_error(), 500
# serarch images
results_images = []
if search_query.categories == ['general'] and search_query.pageno == 1:
search_images_engines = []
images = []
if search.categories == ['general'] and search.pageno == 1:
images_engines = []
disabled_engines = request.preferences.engines.get_disabled()
for engine in categories['images']:
if (engine.name, 'images') not in disabled_engines:
search_images_engines.append({'category': 'images', 'name': engine.name})
images_search_query = SearchQuery(search_query.query.decode('utf8'), search_images_engines, ['images'], search_query.lang,
search_query.safesearch, 1, search_query.time_range)
results_images_big = SearchWithPlugins(images_search_query, request.user_plugins,
request).search().get_ordered_results()
images_engines.append({'category': 'images', 'name': engine.name})
search_query = SearchQuery(search.query.decode('utf8'), images_engines, ['images'], search.language,
search.safe_search, search.pageno, search.time_range)
for image in results_images_big[:min(5, len(results_images_big))]:
results_images.append(image)
all_images = start_search(search_query, request.user_plugins).results
# results
results = result_container.get_ordered_results()
number_of_results = result_container.results_number()
if number_of_results < result_container.results_length():
number_of_results = 0
for image in all_images[:min(5, len(all_images))]:
images.append(image)
results = list(search.results)
# UI
advanced_search = request.form.get('advanced_search', None)
# output
config_results(results, search_query.query)
config_results(results_images, search_query.query)
config_results(results, search.query)
config_results(images, search.query)
output_format = request.form.get('format', 'html')
if output_format not in ['html', 'csv', 'json', 'rss']:
output_format = 'html'
if output_format == 'json':
return Response(json.dumps({'query': search_query.query.decode('utf-8'),
'number_of_results': number_of_results,
'results': results,
'answers': list(result_container.answers),
'corrections': list(result_container.corrections),
'infoboxes': result_container.infoboxes,
'suggestions': list(result_container.suggestions),
'unresponsive_engines': list(result_container.unresponsive_engines)},
default=lambda item: list(item) if isinstance(item, set) else item),
mimetype='application/json')
elif output_format == 'csv':
csv = UnicodeWriter(StringIO())
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
csv.writerow(keys)
for row in results:
row['host'] = row['parsed_url'].netloc
csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv')
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
response.headers.add('Content-Disposition', cont_disp)
return response
elif output_format == 'rss':
response_rss = render(
'opensearch_response_rss.xml',
results=results,
q=request.form['q'],
number_of_results=number_of_results,
base_url=get_base_url(),
override_theme='__common__',
)
return Response(response_rss, mimetype='text/xml')
return render(
'results.html',
results=results,
q=request.form['q'],
selected_categories=search_query.categories,
pageno=search_query.pageno,
time_range=search_query.time_range,
number_of_results=format_decimal(number_of_results),
selected_categories=search.categories,
pageno=search.pageno,
time_range=search.time_range,
number_of_results=format_decimal(search.results_number),
advanced_search=advanced_search,
suggestions=result_container.suggestions,
answers=result_container.answers,
corrections=result_container.corrections,
infoboxes=result_container.infoboxes,
paging=result_container.paging,
unresponsive_engines=result_container.unresponsive_engines,
current_language=match_language(search_query.lang,
suggestions=search.suggestions,
answers=search.answers,
corrections=search.corrections,
infoboxes=search.infoboxes,
paging=search.paging,
unresponsive_engines=search.unresponsive_engines,
current_language=match_language(search.language,
LANGUAGE_CODES,
fallback=settings['search']['language']),
image_results=results_images,
image_results=images,
base_url=get_base_url(),
theme=get_current_theme_name(),
favicons=global_favicons[themes.index(get_current_theme_name())]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment