From 5ba742a2e4b784441829de7271c726b304a3ff77 Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Wed, 13 Oct 2021 15:16:50 -0300 Subject: [PATCH 1/5] End point for autocomplete list --- src/app.py | 14 +++++++++++--- src/searcher.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/app.py b/src/app.py index c11afd0..dc047ac 100644 --- a/src/app.py +++ b/src/app.py @@ -1,5 +1,5 @@ from flask import Flask, json, jsonify, Response -from searcher import search_query +import searcher from indexer import connect_elastic from dotenv import load_dotenv import os @@ -15,7 +15,7 @@ es = connect_elastic(es_host, es_port) @app.route('//search/') def search(language, query): - result = search_query(es, query, language) + result = searcher.search_query(es, query, language) data = { 'search_results': result, } @@ -31,4 +31,12 @@ def search(language, query): }) resp.headers['Access-Control-Allow-Origin'] = os.getenv('ALLOW_ORIGINS', '*') - return resp \ No newline at end of file + return resp + +@app.route('//autocomplete') +def autocomplete(language): + result = searcher.autocomplete(es, language) + data = { + 'autocomplete': result, + } + return jsonify(data) \ No newline at end of file diff --git a/src/searcher.py b/src/searcher.py index 63d3ab0..ca8ab07 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -80,6 +80,38 @@ def search_query(es: Elasticsearch, user_query: str, language: str): results.append(h['_source']) return results +def autocomplete(es: Elasticsearch, language: str): + + current_hash = get_current_hash_by_language(es, language) + if current_hash == None: + return [] + + index_name = indexer.get_index_name(language, current_hash) + + if not es.indices.exists(index_name): + return [] + + query = { + "query": { + "match_all": {} + }, + "size": 10000, + "_source": ["title", "subtitles"] + } + + res = es.search(index=index_name, body=query) + results = [] + for h in res['hits']['hits']: + results.append(h['_source']['title']) + results += h['_source']['subtitles'] + + if '' in results: + results.remove('') + + results = list(set(results)) + return results + + if __name__ == "__main__": import os from indexer import connect_elastic -- GitLab From 4fcee88632efdd22ed866dc73f9a1a33fa494f1e Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Fri, 15 Oct 2021 08:55:06 -0300 Subject: [PATCH 2/5] Fix out of range index --- src/searcher.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/searcher.py b/src/searcher.py index ca8ab07..a5489fb 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -22,7 +22,13 @@ def get_current_hash(es: Elasticsearch) -> Optional[str]: if oldest_index == None: return None - return oldest_index.split('-')[1] + oldest_index_params = oldest_index.split('-') + + if len(oldest_index_params) is not 2: + print(f'Your index "{oldest_index}" is not a valid index name') + return None + + return oldest_index_params[1] def get_oldest_index(es: Elasticsearch, indexes: List[str]) -> Optional[str]: if len(indexes) == 0: -- GitLab From a5497f94cf8ade6b48cf56f54a3cfa3721f2d539 Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Fri, 15 Oct 2021 13:02:36 -0300 Subject: [PATCH 3/5] Code refactor --- src/searcher.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/searcher.py b/src/searcher.py index a5489fb..6509dcb 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -53,13 +53,8 @@ def get_oldest_index(es: Elasticsearch, indexes: List[str]) -> Optional[str]: def search_query(es: Elasticsearch, user_query: str, language: str): - current_hash = get_current_hash_by_language(es, language) - if current_hash == None: - return [] - - index_name = indexer.get_index_name(language, current_hash) - - if not es.indices.exists(index_name): + index_name = get_index_name_from_lang(es=es, language=language) + if index_name is None: return [] query = { @@ -86,17 +81,26 @@ def search_query(es: Elasticsearch, user_query: str, language: str): results.append(h['_source']) return results -def autocomplete(es: Elasticsearch, language: str): +def autocomplete(es: Elasticsearch, language: str) -> List[str]: + index_name = get_index_name_from_lang(es=es, language=language) + if index_name is None: + return [] + return autocomplete_for_index(es=es, index_name=index_name) + +def get_index_name_from_lang(es: Elasticsearch, language: str) -> Optional[str]: current_hash = get_current_hash_by_language(es, language) if current_hash == None: - return [] + return None index_name = indexer.get_index_name(language, current_hash) if not es.indices.exists(index_name): - return [] + return None + + return index_name +def autocomplete_for_index(es: Elasticsearch, index_name: str) -> Optional[List[str]]: query = { "query": { "match_all": {} @@ -117,7 +121,6 @@ def autocomplete(es: Elasticsearch, language: str): results = list(set(results)) return results - if __name__ == "__main__": import os from indexer import connect_elastic -- GitLab From 3a04354bcae633a8d2dbca96dce67335c300e6a3 Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Fri, 15 Oct 2021 13:18:21 -0300 Subject: [PATCH 4/5] Returning gzip on autocomplete/search --- src/app.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/app.py b/src/app.py index dc047ac..99de8a6 100644 --- a/src/app.py +++ b/src/app.py @@ -1,5 +1,6 @@ -from flask import Flask, json, jsonify, Response +from flask import Flask, json, Response, make_response import searcher +import gzip from indexer import connect_elastic from dotenv import load_dotenv import os @@ -20,7 +21,7 @@ def search(language, query): 'search_results': result, } try: - resp = jsonify(data) + resp = gzip_json_response(data) except: resp = json.dumps({'error': 'Unable to process at the moment'}) return Response(response=resp, @@ -39,4 +40,12 @@ def autocomplete(language): data = { 'autocomplete': result, } - return jsonify(data) \ No newline at end of file + return gzip_json_response(data) + +def gzip_json_response(data: dict) -> Response: + content = gzip.compress(json.dumps(data).encode('utf8'), 5) + response = make_response(content) + response.headers['Content-length'] = len(content) + response.headers['Content-Encoding'] = 'gzip' + response.headers['Content-Type'] = 'application/json' + return response \ No newline at end of file -- GitLab From fbad69426c877111dbf5559700febf676dd39608 Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Fri, 15 Oct 2021 13:41:55 -0300 Subject: [PATCH 5/5] Fix readme instruction --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 84e6ae3..6aa636d 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ docker run \ elasticsearch:7.13.3 ``` 1. Make sure you are at the virtual env (run `. venv/bin/activate`) -1. To index your content, run: `python3 src/main.py "PATH_TO_YOUR_CONTENT"` +1. To index your content, run: `python src/main.py "PATH_TO_YOUR_CONTENT"` 1. run `export FLASK_ENV=development` 1. run `export FLASK_APP=src/app` 1. run `flask run` -- GitLab