Commit 29eec6ed authored by Johnny Kalajdzic's avatar Johnny Kalajdzic

Finish mysql implementation

parent dd16a6db
......@@ -14,11 +14,12 @@ See the `documentation <https://asciimoo.github.io/searx>`__ and the `wiki <http
|OpenCollective searx backers|
|OpenCollective searx sponsors|
Setup MySql (No deady)
~~~~~~~~~~~~~~~~~~~~~~
Setup MySql
~~~~~~~~~~~
**Install MySql**
``$ sudo apt-get install mysql-server``
``$ sudo apt-get install mysql-server
$ pip install mymysql``
**Start MySql**
``$ sudo service mysql start
......@@ -37,7 +38,7 @@ Setup MySql (No deady)
**Here are some commands to init the database**
``mysql> use searx;``
``mysql> create table SEARCH_HISTORY(QUERY varchar(512), CATEGORIES varchar(256), PAGENO int(11), PAGING tinyint(1), SAFE_SEARCH int(11), LANGUAGE varchar(8), TIME_RANGE varchar(16), ENGINES varchar(1024), RESULTS varchar(16384), RESULTS_NUMBER int(11), ANSWERS varchar(2048), CORRECTIONS varchar(256), INFOBOXES varchar(8192), SUGGESTIONS varchar(512), UNRESPONSIVE_ENGINES varchar(1024));``
``mysql> create table SEARCH_HISTORY(QUERY varchar(512), CATEGORIY varchar(256), PAGENO int(11), PAGING tinyint(1), SAFE_SEARCH int(11), LANGUAGE varchar(8), TIME_RANGE varchar(16), ENGINES varchar(4096), RESULTS mediumtext), RESULTS_NUMBER int(11), ANSWERS varchar(2048), CORRECTIONS varchar(256), INFOBOXES varchar(8192), SUGGESTIONS varchar(1024), UNRESPONSIVE_ENGINES varchar(1024));``
``mysql> quit``
......
......@@ -224,7 +224,7 @@ def https_url_rewrite(result):
return result
def on_result(request, search, result):
def on_result(request, searchData, result):
if result['parsed_url'].scheme == 'http':
https_url_rewrite(result)
return True
......
......@@ -33,7 +33,7 @@ def get_doi_resolver(args, preference_doi_resolver):
return doi_resolver
def on_result(request, search, result):
def on_result(request, searchData, result):
doi = extract_doi(result['parsed_url'])
if doi and len(doi) < 50:
for suffix in ('/', '.pdf', '/full', '/meta', '/abstract'):
......
......@@ -28,19 +28,19 @@ p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE)
# attach callback to the post search hook
# request: flask request object
# ctx: the whole local context of the pre search hook
def post_search(request, search):
if search.search_query.pageno > 1:
def post_search(request, searchData):
if searchData.pageno > 1:
return True
if search.search_query.query == b'ip':
if searchData.query == b'ip':
x_forwarded_for = request.headers.getlist("X-Forwarded-For")
if x_forwarded_for:
ip = x_forwarded_for[0]
else:
ip = request.remote_addr
search.result_container.answers.clear()
search.result_container.answers.add(ip)
elif p.match(search.search_query.query):
searchData.answers.clear()
searchData.answers.add(ip)
elif p.match(searchData.query):
ua = request.user_agent
search.result_container.answers.clear()
search.result_container.answers.add(ua)
searchData.answers.clear()
searchData.answers.add(ua)
return True
......@@ -29,7 +29,7 @@ default_on = True
preference_section = 'privacy'
def on_result(request, search, result):
def on_result(request, searchData, result):
query = result['parsed_url'].query
if query == "":
......
......@@ -164,7 +164,7 @@ class SearchQuery(object):
"""container for all the search parameters (query, language, etc...)"""
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
self.query = query.encode('utf-8')
self.query = query
self.engines = engines
self.categories = categories
self.lang = lang
......
......@@ -20,19 +20,21 @@ import sys
import threading
from time import time
from uuid import uuid4
from flask_babel import gettext
import requests.exceptions
from flask_babel import gettext
import searx.poolrequests as requests_lib
from searx import logger
from searx.answerers import ask
from searx.engines import (
categories, engines, settings
)
from searx.answerers import ask
from searx.utils import gen_useragent
from searx.exceptions import SearxParameterException
from searx.plugins import plugins
from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
from searx.results import ResultContainer
from searx import logger
from searx.plugins import plugins
from searx.exceptions import SearxParameterException
from searx.utils import gen_useragent
try:
from thread import start_new_thread
......@@ -202,7 +204,7 @@ def get_search_query_from_webapp(preferences, form):
raw_text_query.parse_query()
# set query
query = raw_text_query.getSearchQuery()
query = raw_text_query.getSearchQuery().encode('utf-8')
# get and check page number
pageno_param = form.get('pageno', '1')
......@@ -253,81 +255,19 @@ def get_search_query_from_webapp(preferences, form):
query_engines = raw_text_query.engines
# query_categories
query_categories = []
# if engines are calculated from query,
# set categories by using that informations
if query_engines and raw_text_query.specific:
additional_categories = set()
for engine in query_engines:
if 'from_bang' in engine and engine['from_bang']:
additional_categories.add('none')
else:
additional_categories.add(engine['category'])
query_categories = list(additional_categories)
query_category = form.get('category')
if query_category is None:
query_category = 'general'
# otherwise, using defined categories to
# calculate which engines should be used
else:
# set categories/engines
load_default_categories = True
for pd_name, pd in form.items():
if pd_name == 'categories':
query_categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories)
elif pd_name == 'engines':
pd_engines = [{'category': engines[engine].categories[0],
'name': engine}
for engine in map(unicode.strip, pd.split(',')) if engine in engines]
if pd_engines:
query_engines.extend(pd_engines)
load_default_categories = False
elif pd_name.startswith('category_'):
category = pd_name[9:]
# if category is not found in list, skip
if category not in categories:
continue
if pd != 'off':
# add category to list
query_categories.append(category)
elif category in query_categories:
# remove category from list if property is set to 'off'
query_categories.remove(category)
if not load_default_categories:
if not query_categories:
query_categories = list(set(engine['category']
for engine in query_engines))
else:
# if no category is specified for this search,
# using user-defined default-configuration which
# (is stored in cookie)
if not query_categories:
cookie_categories = preferences.get_value('categories')
for ccateg in cookie_categories:
if ccateg in categories:
query_categories.append(ccateg)
# if still no category is specified, using general
# as default-category
if not query_categories:
query_categories = ['general']
# using all engines for that search, which are
# declared under the specific categories
for categ in query_categories:
query_engines.extend({'category': categ,
'name': engine.name}
for engine in categories[categ]
if (engine.name, categ) not in disabled_engines)
return SearchQuery(query, query_engines, query_categories,
query_lang, query_safesearch, query_pageno, query_time_range)
for engine in categories[query_category]:
if (engine.name, query_category) not in disabled_engines:
query_engines.append({'category': query_category, 'name': engine.name})
return SearchQuery(query, query_engines, [query_category], query_lang, query_safesearch, query_pageno,
query_time_range)
class Search(object):
class Search(object):
"""Search information container"""
def __init__(self, search_query):
......@@ -417,7 +357,6 @@ class Search(object):
class SearchWithPlugins(Search):
"""Similar to the Search class but call the plugins."""
def __init__(self, search_query, ordered_plugin_list, request):
......
import base64
import json
import threading
import urllib
import pymysql
from searx.plugins import plugins
from searx.query import SearchQuery
from searx.search import Search, get_search_query_from_webapp
from searx.url_utils import urlparse
class Search(object):
settings = None
class SearchData(object):
def __init__(self, search_query, results, paging,
results_number, answers, corrections, infoboxes, suggestions, unresponsive_engines):
self.categories = search_query.categories
......@@ -24,71 +32,135 @@ class Search(object):
self.unresponsive_engines = unresponsive_engines
def read(q, settings):
def read(q):
time_range = q.time_range
if time_range == "":
time_range = "None"
if q.time_range is None:
q.time_range = ""
connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'],
database=settings['database'])
try:
with connection.cursor() as cursor:
sql = "SELECT RESULTS, PAGING, RESULTS_NUMBER, ANSWERS, CORRECTIONS, INFOBOXES, SUGGESTIONS, " \
"UNRESPONSIVE_ENGINES FROM SEARCH_HISTORY WHERE QUERY='%s' AND CATEGORIES='%s' AND PAGENO=%s AND " \
"UNRESPONSIVE_ENGINES FROM SEARCH_HISTORY WHERE QUERY='%s' AND CATEGORY='%s' AND PAGENO=%s AND " \
"SAFE_SEARCH=%s AND LANGUAGE='%s' AND TIME_RANGE='%s' AND ENGINES='%s'"
cursor.execute(
sql % (e(q.query), je(q.categories), q.pageno, q.safesearch, e(q.lang), time_range, je(q.engines)))
for result in cursor:
return Search(q, jd(result[0]), result[1] != 0, result[2], jd(result[3]),
jd(result[4]), jd(result[5]), jd(result[6]), jd(result[7]))
sql % (e(q.query), q.categories[0], q.pageno, q.safesearch, q.lang, time_range, je(q.engines)))
for response in cursor:
results = jd(response[0])
for result in results:
result['parsed_url'] = urlparse(result['url'])
return SearchData(q, results, response[1] != 0, response[2], jds(response[3]),
jds(response[4]), jd(response[5]), jds(response[6]), jds(response[7]))
finally:
connection.close()
return None
def save(q, r, settings):
results_number = r.results_number()
if results_number < r.results_length():
results_number = 0
results = r.get_ordered_results()
for result in results:
result['engines'] = list(result['engines'])
time_range = q.time_range
if time_range == "":
time_range = "None"
def save(d):
connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'],
database=settings['database'])
try:
with connection.cursor() as cursor:
sql = "INSERT INTO SEARCH_HISTORY(QUERY, CATEGORIES, PAGENO, SAFE_SEARCH, LANGUAGE, TIME_RANGE, ENGINES, " \
sql = "INSERT INTO SEARCH_HISTORY(QUERY, CATEGORY, PAGENO, SAFE_SEARCH, LANGUAGE, TIME_RANGE, ENGINES, " \
"RESULTS, PAGING, RESULTS_NUMBER, ANSWERS, CORRECTIONS, INFOBOXES, SUGGESTIONS, " \
"UNRESPONSIVE_ENGINES) VALUES('%s', '%s', %s, %s, '%s', '%s', '%s', '%s', %s, %s, '%s', '%s', '%s'," \
" '%s', '%s')"
cursor.execute(sql % (e(q.query), je(q.categories), q.pageno, q.safesearch, e(q.lang), time_range,
je(q.engines), jle(results), r.paging, results_number, jle(r.answers),
jle(r.corrections), je(r.infoboxes), jle(r.suggestions), jle(r.unresponsive_engines)))
cursor.execute(sql % (e(d.query), d.categories[0], d.pageno, d.safe_search, d.language, d.time_range,
je(d.engines), je(d.results), d.paging, d.results_number, jes(d.answers),
jes(d.corrections), je(d.infoboxes), jes(d.suggestions), jes(d.unresponsive_engines)))
connection.commit()
finally:
connection.close()
return Search(q, results, r.paging, results_number, r.answers, r.corrections,
r.infoboxes, r.suggestions, r.unresponsive_engines)
def e(uncoded):
return base64.b64encode(uncoded)
def get_twenty_queries(x):
result = []
connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'],
database=settings['database'])
try:
with connection.cursor() as cursor:
cursor.execute("SELECT QUERY, ENGINES, CATEGORY, LANGUAGE , SAFE_SEARCH, PAGENO, TIME_RANGE FROM "
"SEARCH_HISTORY LIMIT %s,20" % x)
for row in cursor:
result.append(SearchQuery(d(row[0]), jd(row[1]), [row[2]], row[3], row[4], row[5], row[6]))
finally:
connection.close()
return result
def d(coded):
return base64.b64decode(coded)
def e(obj):
return urllib.quote_plus(obj)
def je(uncoded):
return base64.b64encode(json.dumps(uncoded))
def d(coded):
return urllib.unquote_plus(coded)
def jle(uncoded):
return base64.b64encode(json.dumps(list(uncoded)))
def je(obj):
return e(json.dumps(obj))
def jd(coded):
return json.loads(base64.b64decode(coded))
return json.loads(d(coded))
def jes(set):
return je(list(set))
def jds(coded):
return set(jd(coded))
def get_search_data(q, r):
results_number = r.results_number()
if results_number < r.results_length():
results_number = 0
results = r.get_ordered_results()
for result in results:
result['engines'] = list(result['engines'])
if not type(result['engines']) is list:
print(result['engines'])
if 'publishedDate' in result:
try:
result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S')
finally:
result['publishedDate'] = None
if q.time_range is None:
q.time_range = ""
return SearchData(q, results, r.paging, results_number, r.answers, r.corrections,
r.infoboxes, r.suggestions, r.unresponsive_engines)
def search(request):
search_query = get_search_query_from_webapp(request.preferences, request.form)
searchData = read(search_query)
if searchData is None:
result_container = Search(search_query).search()
searchData = get_search_data(search_query, result_container)
threading.Thread(target=save, args=(searchData,), name='save_search_' + str(searchData)).start()
ordered_plugin = request.user_plugins
plugins.call(ordered_plugin, 'post_search', request, searchData)
for result in searchData.results:
plugins.call(ordered_plugin, 'on_result', request, searchData, result)
return searchData
def update(d):
connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'],
database=settings['database'])
try:
with connection.cursor() as cursor:
sql = "UPDATE SEARCH_HISTORY SET RESULTS='%s', PAGING=%s, RESULTS_NUMBER=%s, ANSWERS='%s', CORRECTIONS='%s', INFOBOXES='%s', SUGGESTIONS='%s', " \
"UNRESPONSIVE_ENGINES='%s' WHERE QUERY='%s' AND CATEGORY='%s' AND PAGENO=%s AND " \
"SAFE_SEARCH=%s AND LANGUAGE='%s' AND TIME_RANGE='%s' AND ENGINES='%s'"
cursor.execute(sql % (je(d.results), d.paging, d.results_number, jes(d.answers), jes(d.corrections),
je(d.infoboxes), jes(d.suggestions), jes(d.unresponsive_engines),
e(d.query), d.categories[0], d.pageno, d.safe_search, d.language, d.time_range,
je(d.engines)))
connection.commit()
finally:
connection.close()
general:
debug : debug # Debug mode, only for development
debug : False # Debug mode, only for development
instance_name : "eelo" # displayed name
search:
......@@ -20,6 +20,7 @@ mysql:
user : "searx"
password : "password" # change this!
database : "searx"
upgrade_history: 86400 # in seconds (1day = 86400s)
ui:
static_path : "" # Custom static path - leave it blank if you didn't change
......
......@@ -87,7 +87,7 @@
<input type="radio" class="tab-radio" id="tab_engines_r" value="engines" name="tab">
<div id="tab_engines" class="tab-content">
{% for categ in ['general', 'images'] %}
{% for categ in categories %}
<!-- <input type="radio" class="tab-radio" id="" value="" name="engines_category_tab"> -->
<section id="engines">
<h3>{{categ}}</h3>
......
......@@ -28,10 +28,12 @@ import hmac
import json
import os
import sys
import time
import atexit
import requests
from searx import logger
from searx import logger, search_database
logger = logger.getChild('webapp')
......@@ -65,8 +67,8 @@ from searx.utils import (
)
from searx.version import VERSION_STRING
from searx.languages import language_codes as languages
from searx.search import SearchWithPlugins, get_search_query_from_webapp
from searx.query import RawTextQuery, SearchQuery
from searx.search import Search
from searx.query import RawTextQuery
from searx.autocomplete import searx_bang, backends as autocomplete_backends
from searx.plugins import plugins
from searx.plugins.oa_doi_rewrite import get_doi_resolver
......@@ -74,7 +76,8 @@ from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers
from searx.url_utils import urlencode, urlparse, urljoin
from searx.utils import new_hmac
from searx.search_database import read, save, Search
from searx.search_database import get_twenty_queries, search
import threading
# check if the pyopenssl package is installed.
# It is needed for SSL connection without trouble, see #298
......@@ -443,24 +446,19 @@ def config_results(results, query):
result['title'] = highlight_content(escape(result['title'] or u''), query)
result['pretty_url'] = prettify_url(result['url'])
# TODO, check if timezone is calculated right
if 'publishedDate' in result:
try: # test if publishedDate >= 1900 (datetime module bug)
result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
except ValueError:
result['publishedDate'] = None
else:
if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60)
if hours == 0:
result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
else:
result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(
hours=hours, minutes=minutes) # noqa
if 'pubdate' in result:
publishedDate = datetime.strptime(result['pubdate'], '%Y-%m-%d %H:%M:%S')
if publishedDate >= datetime.now() - timedelta(days=1):
timedifference = datetime.now() - publishedDate
minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60)
if hours == 0:
result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
else:
result['publishedDate'] = format_date(result['publishedDate'])
result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(
hours=hours, minutes=minutes) # noqa
else:
result['publishedDate'] = format_date(publishedDate)
def index_error():
......@@ -470,15 +468,6 @@ def index_error():
)
def start_search(search_query, user_plugins):
search = read(search_query, settings['mysql'])
if search == None:
# result_container = Search(search_query).search() # without plugins
result_container = SearchWithPlugins(search_query, user_plugins, request).search()
return save(search_query, result_container, settings['mysql'])
return search
@app.route('/search', methods=['GET', 'POST'])
@app.route('/', methods=['GET', 'POST'])
def index():
......@@ -489,23 +478,9 @@ def index():
)
# search
search = None
searchData = None
try:
# we dont want users to select multiple categories, this simplifies the experience.
if request.form.get("categories"):
request.form["categories"] = "general"
if request.form.get("category"):
for k, v in request.form.items():
if k.startswith("category_"):
request.form.pop(k, None)
request.form["category_" + request.form['category']] = u"On"
# else:
# request.form["category_general"] = u"On"
print(request.form)
search_query = get_search_query_from_webapp(request.preferences, request.form)
search = start_search(search_query, request.user_plugins)
searchData = search(request)
except Exception as e:
# log exception
logger.exception('search error')
......@@ -516,55 +491,45 @@ def index():
else:
return index_error(), 500
# serarch images
# serarch 5 images and 5 videos
images = []
if search.categories == ['general'] and search.pageno == 1:
images_engines = []
disabled_engines = request.preferences.engines.get_disabled()
for engine in categories['images']:
if (engine.name, 'images') not in disabled_engines:
images_engines.append({'category': 'images', 'name': engine.name})
search_query = SearchQuery(search.query.decode('utf8'), images_engines, ['images'], search.language,
search.safe_search, search.pageno, search.time_range)
all_images = start_search(search_query, request.user_plugins).results
videos = []
if searchData.categories == ['general'] and searchData.pageno == 1:
request.form['category'] = 'images'
all_images = search(request).results
for image in all_images[:min(5, len(all_images))]:
images.append(image)
results = list(search.results)
# UI
advanced_search = request.form.get('advanced_search', None)
request.form['category'] = 'videos'
all_videos = search(request).results
for video in all_videos[:min(5, len(all_videos))]:
videos.append(video)
# output
config_results(results, search.query)
config_results(images, search.query)
output_format = request.form.get('format', 'html')
if output_format not in ['html', 'csv', 'json', 'rss']:
output_format = 'html'
config_results(searchData.results, searchData.query)
config_results(images, searchData.query)
config_results(videos, searchData.query)
return render(
'results.html',
results=results,
q=request.form['q'],
selected_categories=search.categories,
pageno=search.pageno,
time_range=search.time_range,
number_of_results=format_decimal(search.results_number),
advanced_search=advanced_search,
suggestions=search.suggestions,
answers=search.answers,
corrections=search.corrections,
infoboxes=search.infoboxes,
paging=search.paging,
unresponsive_engines=search.unresponsive_engines,
current_language=match_language(search.language,
results=searchData.results,
q=searchData.query.decode('utf-8'),
selected_categories=searchData.categories,
pageno=searchData.pageno,
time_range=searchData.time_range,
number_of_results=format_decimal(searchData.results_number),
advanced_search=request.form.get('advanced_search', None),
suggestions=searchData.suggestions,
answers=searchData.answers,
corrections=searchData.corrections,
infoboxes=searchData.infoboxes,
paging=searchData.paging,
unresponsive_engines=searchData.unresponsive_engines,
current_language=match_language(searchData.language,
LANGUAGE_CODES,
fallback=settings['search']['language']),
image_results=images,
videos_results=videos,
base_url=get_base_url(),
theme=get_current_theme_name(),
favicons=global_favicons[themes.index(get_current_theme_name())]
......@@ -582,7 +547,6 @@ def about():
@app.route('/autocompleter', methods=['GET', 'POST'])
def autocompleter():
"""Return autocompleter results"""
# set blocked engines
disabled_engines = request.preferences.engines.get_disabled()
......@@ -843,15 +807,47 @@ def page_not_found(e):
return render('404.html'), 404
running = threading.Event()
def wait_updating(start_time):
wait = settings['mysql']['upgrade_history'] - int(time.time() - start_time)
if wait > 0:
running.wait(wait)
def update_results():
start_time = time.time()
x = 0
while not running.is_set():
queries = get_twenty_queries(x)
for query in queries:
result_container = Search(query).search()
searchData = search_database.get_search_data(query, result_container)
search_database.update(searchData)
if running.is_set():
return
x += 20
if len(queries) < 20:
x = 0
wait_updating(start_time)
start_time = time.time()
def run():
logger.debug('starting webserver on %s:%s', settings['server']['port'], settings['server']['bind_address'])
search_database.settings = settings['mysql']
threading.Thread(target=update_results, name='results_updater').start()
print "engine server starting"
app.run(
debug=searx_debug,
use_debugger=searx_debug,
port=settings['server']['port'],
host=settings['server']['bind_address'],
threaded=True
threaded=False
)
print "wait for shutdown..."
running.set()
class ReverseProxyPathFix(object):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment