Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dd16a6db authored by Johnny Kalajdzic's avatar Johnny Kalajdzic
Browse files

Make engine faster with a history in MySql

parent d54a30b9
Loading
Loading
Loading
Loading
+75 −35
Original line number Diff line number Diff line
import base64
import json

import pymysql


class Search(object):
    def __init__(self, categories, query, pageno, paging, safe_search, language, time_range, engines, results,
                 results_number, results_length, answers, corrections, infoboxes, suggestions, unresponsive_engines):
        self.categories = categories
        self.query = query
        self.pageno = pageno
        self.paging = paging
        self.safe_search = safe_search
        self.language = language
        self.time_range = time_range
        self.engines = engines
    def __init__(self, search_query, results, paging,
                 results_number, answers, corrections, infoboxes, suggestions, unresponsive_engines):
        self.categories = search_query.categories
        self.query = search_query.query
        self.pageno = search_query.pageno
        self.safe_search = search_query.safesearch
        self.language = search_query.lang
        self.time_range = search_query.time_range
        self.engines = search_query.engines
        self.results = results
        self.paging = paging
        self.results_number = results_number
        self.results_length = results_length
        self.answers = answers
        self.corrections = corrections
        self.infoboxes = infoboxes
@@ -24,31 +24,71 @@ class Search(object):
        self.unresponsive_engines = unresponsive_engines


def read(categories, query, pageno, safe_search, language, time_range, engines, mysql_settings):
    if len(categories) != 1:
def read(q, settings):
    time_range = q.time_range
    if time_range == "":
        time_range = "None"
    connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'],
                                 database=settings['database'])
    try:
        with connection.cursor() as cursor:
            sql = "SELECT RESULTS, PAGING, RESULTS_NUMBER, ANSWERS, CORRECTIONS, INFOBOXES, SUGGESTIONS, " \
                  "UNRESPONSIVE_ENGINES FROM SEARCH_HISTORY WHERE QUERY='%s' AND CATEGORIES='%s' AND PAGENO=%s AND " \
                  "SAFE_SEARCH=%s AND LANGUAGE='%s' AND TIME_RANGE='%s' AND ENGINES='%s'"
            cursor.execute(
                sql % (e(q.query), je(q.categories), q.pageno, q.safesearch, e(q.lang), time_range, je(q.engines)))
            for result in cursor:
                return Search(q, jd(result[0]), result[1] != 0, result[2], jd(result[3]),
                              jd(result[4]), jd(result[5]), jd(result[6]), jd(result[7]))
    finally:
        connection.close()
    return None

    category = categories[0].upper().replace(" ", "_")

    with pymysql.connect(host=mysql_settings['host'],
                         user=mysql_settings['user'],
                         password=mysql_settings['password'],
                         database=mysql_settings['database'],
                         charset='utf8mb4',
                         cursorclass=pymysql.cursors.DictCursor) as connection:
def save(q, r, settings):
    results_number = r.results_number()
    if results_number < r.results_length():
        results_number = 0
    results = r.get_ordered_results()
    for result in results:
        result['engines'] = list(result['engines'])
    time_range = q.time_range
    if time_range == "":
        time_range = "None"

    connection = pymysql.connect(host=settings['host'], user=settings['user'], password=settings['password'],
                                 database=settings['database'])
    try:
        with connection.cursor() as cursor:
            sql = "SELECT RESULTS, PAGING, RESULTS_NUMBER, RESULTS_LENGTH, ANSWERS, CORRECTIONS, INFOBOXES, " \
                  "SUGGESTIONS, UNRESPONSIVE_ENGINES FROM %s WHERE QUERY=%s AND PAGENO=%s AND SAFE_SEARCH=%s" \
                  " AND LANGUAGE=%s AND TIME_RANGE=%s AND ENGINES=%s"
            cursor.execute(sql,
                           (category, query, pageno, safe_search, language, time_range, str(engines).replace("'", '"')))
            for result in cursor:
                return Search(categories, query, pageno, result[1] != 0, safe_search, language, time_range, engines,
                              json.loads(result[0]), result[2], result[3], json.loads(result[4]),
                              json.loads(result[5]), json.loads(result[6]), json.loads(result[7]),
                              json.loads(result[8]))
    return None
            sql = "INSERT INTO SEARCH_HISTORY(QUERY, CATEGORIES, PAGENO, SAFE_SEARCH, LANGUAGE, TIME_RANGE, ENGINES, " \
                  "RESULTS, PAGING, RESULTS_NUMBER, ANSWERS, CORRECTIONS, INFOBOXES, SUGGESTIONS, " \
                  "UNRESPONSIVE_ENGINES) VALUES('%s', '%s', %s, %s, '%s', '%s', '%s', '%s', %s, %s, '%s', '%s', '%s'," \
                  " '%s', '%s')"
            cursor.execute(sql % (e(q.query), je(q.categories), q.pageno, q.safesearch, e(q.lang), time_range,
                                  je(q.engines), jle(results), r.paging, results_number, jle(r.answers),
                                  jle(r.corrections), je(r.infoboxes), jle(r.suggestions), jle(r.unresponsive_engines)))
            connection.commit()
    finally:
        connection.close()
    return Search(q, results, r.paging, results_number, r.answers, r.corrections,
                  r.infoboxes, r.suggestions, r.unresponsive_engines)


def e(uncoded):
    return base64.b64encode(uncoded)


def d(coded):
    return base64.b64decode(coded)


def je(uncoded):
    return base64.b64encode(json.dumps(uncoded))


def jle(uncoded):
    return base64.b64encode(json.dumps(list(uncoded)))


def save(search_query):
    path = find_path(search_query)
    writer = open(path, 'w')
def jd(coded):
    return json.loads(base64.b64decode(coded))
+6 −0
Original line number Diff line number Diff line
@@ -15,6 +15,12 @@ server:
    image_proxy : False # Proxying image results through searx
    http_protocol_version : "1.0"  # 1.0 and 1.1 are supported

mysql:
    host : "127.0.0.1"
    user : "searx"
    password : "password" # change this!
    database : "searx"

ui:
    static_path : "" # Custom static path - leave it blank if you didn't change
    templates_path : "" # Custom templates path - leave it blank if you didn't change
+45 −77
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@ from searx.engines import (
    categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
)
from searx.utils import (
    UnicodeWriter, highlight_content, html_to_text, get_resources_directory,
    highlight_content, get_resources_directory,
    get_static_files, get_result_templates, get_themes, gen_useragent,
    dict_subset, prettify_url, match_language
)
@@ -74,7 +74,7 @@ from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers
from searx.url_utils import urlencode, urlparse, urljoin
from searx.utils import new_hmac
from searx.results import ResultContainer
from searx.search_database import read, save, Search

# check if the pyopenssl package is installed.
# It is needed for SSL connection without trouble, see #298
@@ -435,6 +435,7 @@ def pre_request():
                or plugin.id in allowed_plugins):
            request.user_plugins.append(plugin)


def config_results(results, query):
    for result in results:
        if 'content' in result and result['content']:
@@ -461,6 +462,7 @@ def config_results(results, query):
                else:
                    result['publishedDate'] = format_date(result['publishedDate'])


def index_error():
    request.errors.append(gettext('search error'))
    return render(
@@ -468,6 +470,15 @@ def index_error():
    )


def start_search(search_query, user_plugins):
    search = read(search_query, settings['mysql'])
    if search == None:
        # result_container = Search(search_query).search() #  without plugins
        result_container = SearchWithPlugins(search_query, user_plugins, request).search()
        return save(search_query, result_container, settings['mysql'])
    return search


@app.route('/search', methods=['GET', 'POST'])
@app.route('/', methods=['GET', 'POST'])
def index():
@@ -478,8 +489,7 @@ def index():
        )

    # search
    search_query = None
    result_container = None
    search = None
    try:
        # we dont want users to select multiple categories, this simplifies the experience.
        if request.form.get("categories"):
@@ -495,10 +505,7 @@ def index():
        print(request.form)

        search_query = get_search_query_from_webapp(request.preferences, request.form)

        # search = Search(search_query) #  without plugins
        search = SearchWithPlugins(search_query, request.user_plugins, request)
        result_container = search.search()
        search = start_search(search_query, request.user_plugins)
    except Exception as e:
        # log exception
        logger.exception('search error')
@@ -510,93 +517,54 @@ def index():
            return index_error(), 500

    # serarch images
    results_images = []    
    if search_query.categories == ['general'] and search_query.pageno == 1:
        search_images_engines = []
    images = []
    if search.categories == ['general'] and search.pageno == 1:
        images_engines = []
        disabled_engines = request.preferences.engines.get_disabled()
        for engine in categories['images']:
            if (engine.name, 'images') not in disabled_engines:
                search_images_engines.append({'category': 'images', 'name': engine.name})
        images_search_query = SearchQuery(search_query.query.decode('utf8'), search_images_engines, ['images'], search_query.lang,
                                          search_query.safesearch, 1, search_query.time_range)
        results_images_big = SearchWithPlugins(images_search_query, request.user_plugins,
                                                    request).search().get_ordered_results()
                images_engines.append({'category': 'images', 'name': engine.name})

        search_query = SearchQuery(search.query.decode('utf8'), images_engines, ['images'], search.language,
                                   search.safe_search, search.pageno, search.time_range)

        for image in results_images_big[:min(5, len(results_images_big))]:
            results_images.append(image)
        all_images = start_search(search_query, request.user_plugins).results

    # results
    results = result_container.get_ordered_results()
    number_of_results = result_container.results_number()
    if number_of_results < result_container.results_length():
        number_of_results = 0
        for image in all_images[:min(5, len(all_images))]:
            images.append(image)

    results = list(search.results)

    # UI
    advanced_search = request.form.get('advanced_search', None)

    # output
    config_results(results, search_query.query)
    config_results(results_images, search_query.query)
    config_results(results, search.query)
    config_results(images, search.query)

    output_format = request.form.get('format', 'html')
    if output_format not in ['html', 'csv', 'json', 'rss']:
        output_format = 'html'

    if output_format == 'json':
        return Response(json.dumps({'query': search_query.query.decode('utf-8'),
                                    'number_of_results': number_of_results,
                                    'results': results,
                                    'answers': list(result_container.answers),
                                    'corrections': list(result_container.corrections),
                                    'infoboxes': result_container.infoboxes,
                                    'suggestions': list(result_container.suggestions),
                                    'unresponsive_engines': list(result_container.unresponsive_engines)},
                                   default=lambda item: list(item) if isinstance(item, set) else item),
                        mimetype='application/json')
    elif output_format == 'csv':
        csv = UnicodeWriter(StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        csv.writerow(keys)
        for row in results:
            row['host'] = row['parsed_url'].netloc
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif output_format == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=results,
            q=request.form['q'],
            number_of_results=number_of_results,
            base_url=get_base_url(),
            override_theme='__common__',
        )
        return Response(response_rss, mimetype='text/xml')



    return render(
        'results.html',
        results=results,
        q=request.form['q'],
        selected_categories=search_query.categories,
        pageno=search_query.pageno,
        time_range=search_query.time_range,
        number_of_results=format_decimal(number_of_results),
        selected_categories=search.categories,
        pageno=search.pageno,
        time_range=search.time_range,
        number_of_results=format_decimal(search.results_number),
        advanced_search=advanced_search,
        suggestions=result_container.suggestions,
        answers=result_container.answers,
        corrections=result_container.corrections,
        infoboxes=result_container.infoboxes,
        paging=result_container.paging,
        unresponsive_engines=result_container.unresponsive_engines,
        current_language=match_language(search_query.lang,
        suggestions=search.suggestions,
        answers=search.answers,
        corrections=search.corrections,
        infoboxes=search.infoboxes,
        paging=search.paging,
        unresponsive_engines=search.unresponsive_engines,
        current_language=match_language(search.language,
                                        LANGUAGE_CODES,
                                        fallback=settings['search']['language']),
        image_results=results_images,
        image_results=images,
        base_url=get_base_url(),
        theme=get_current_theme_name(),
        favicons=global_favicons[themes.index(get_current_theme_name())]