From 935859275d02ae068a1f64516ac7261288ca8976 Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Thu, 16 Dec 2021 11:41:08 -0300 Subject: [PATCH 1/4] WIP: Search very specific only for titles and subtitles --- src/searcher.py | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/src/searcher.py b/src/searcher.py index fc67c30..cfb17c5 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -70,6 +70,40 @@ def search_query(elastic_search: Elasticsearch, user_query: str, language: str): if index_name is None: return [] + query = { + 'query': { + 'bool': { + 'should': [ + { + 'match_phrase': { + 'subtitles': user_query, + } + }, + { + 'match_phrase': { + 'title': 'Easy Installer - FAQ', + } + } + ] + }, + }, + 'highlight': { + 'fields': { + 'description': {} + } + }, + '_source': ['title', 'subtitles', 'url', 'description', 'lang', 'body'] + } + + res = elastic_search.search(index=index_name, body=query) + + results = [] + for hit in res['hits']['hits']: + result = hit['_source'] + if 'highlight' in hit: + result['description'] = ' '.join(hit['highlight']['description']) + results.append(result) + query = { 'query': { 'multi_match': { @@ -89,11 +123,13 @@ def search_query(elastic_search: Elasticsearch, user_query: str, language: str): } res = elastic_search.search(index=index_name, body=query) - results = [] for hit in res['hits']['hits']: result = hit['_source'] if 'highlight' in hit: result['description'] = ' '.join(hit['highlight']['description']) + + if result in results: + continue results.append(result) return results -- GitLab From a09cd80871fd9f32f27be3a8d7087bcec3238f2e Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Thu, 16 Dec 2021 18:53:48 -0300 Subject: [PATCH 2/4] API sending out important post --- src/app.py | 2 ++ src/searcher.py | 49 +++++++++++++++++++++++++++++++++++++------------ 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/src/app.py b/src/app.py index 9b88b2e..92759fd 100644 --- a/src/app.py +++ b/src/app.py @@ -19,9 +19,11 @@ es = connect_elastic(es_host, es_port) @app.route('//search/') def search(language, query): """ Endpoint to search into all contents of the specific language """ + specific_post = searcher.search_especific_query(es, query, language) result = searcher.search_query(es, query, language) data = { 'search_results': result, + 'specific_post': specific_post, } try: resp = gzip_json_response(data) diff --git a/src/searcher.py b/src/searcher.py index cfb17c5..b695a1e 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -2,6 +2,7 @@ import math from typing import List, Optional from elasticsearch.client import Elasticsearch +import post import indexer @@ -62,26 +63,39 @@ def get_oldest_index(elastic_search: Elasticsearch, indexes: List[str]) -> Optio return oldest_index -def search_query(elastic_search: Elasticsearch, user_query: str, language: str): - """ Performs the search using a query on a specific language """ +def search_especific_query( + elastic_search: Elasticsearch, + user_query: str, + language: str) -> Optional[post.Post]: + """ Performs a more specific search to give priority to titles """ index_name = get_index_name_from_lang( elastic_search=elastic_search, language=language) if index_name is None: - return [] + return None query = { 'query': { 'bool': { 'should': [ { - 'match_phrase': { - 'subtitles': user_query, + 'match': { + 'subtitles': { + 'query': user_query, + 'fuzziness': 0, + 'max_expansions': 1, + 'minimum_should_match': '100%', + }, } }, { - 'match_phrase': { - 'title': 'Easy Installer - FAQ', + 'match': { + 'title': { + 'query': user_query, + 'fuzziness': 0, + 'max_expansions': 1, + 'minimum_should_match': '100%', + }, } } ] @@ -97,12 +111,25 @@ def search_query(elastic_search: Elasticsearch, user_query: str, language: str): res = elastic_search.search(index=index_name, body=query) - results = [] + if len(res['hits']['hits']) == 0: + return None + for hit in res['hits']['hits']: result = hit['_source'] if 'highlight' in hit: result['description'] = ' '.join(hit['highlight']['description']) - results.append(result) + return result + + return None + + +def search_query(elastic_search: Elasticsearch, user_query: str, language: str) -> List[post.Post]: + """ Performs the search using a query on a specific language """ + + index_name = get_index_name_from_lang( + elastic_search=elastic_search, language=language) + if index_name is None: + return [] query = { 'query': { @@ -123,13 +150,11 @@ def search_query(elastic_search: Elasticsearch, user_query: str, language: str): } res = elastic_search.search(index=index_name, body=query) + results = [] for hit in res['hits']['hits']: result = hit['_source'] if 'highlight' in hit: result['description'] = ' '.join(hit['highlight']['description']) - - if result in results: - continue results.append(result) return results -- GitLab From 66e7692f7649cceb065e3c96ff5fab00a9900cd1 Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Fri, 17 Dec 2021 13:48:11 -0300 Subject: [PATCH 3/4] Fix cases where content is similar --- src/searcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/searcher.py b/src/searcher.py index b695a1e..3235497 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -80,7 +80,7 @@ def search_especific_query( 'should': [ { 'match': { - 'subtitles': { + 'subtitles.keyword': { 'query': user_query, 'fuzziness': 0, 'max_expansions': 1, @@ -90,7 +90,7 @@ def search_especific_query( }, { 'match': { - 'title': { + 'title.keyword': { 'query': user_query, 'fuzziness': 0, 'max_expansions': 1, -- GitLab From 118478ec9a09a3638b15c820648fa3d62a85371c Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Fri, 17 Dec 2021 14:36:16 -0300 Subject: [PATCH 4/4] Refactoring repeated operation --- src/searcher.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/searcher.py b/src/searcher.py index 3235497..24a12a1 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -115,14 +115,18 @@ def search_especific_query( return None for hit in res['hits']['hits']: - result = hit['_source'] - if 'highlight' in hit: - result['description'] = ' '.join(hit['highlight']['description']) - return result + return _from_hit_to_post(hit) return None +def _from_hit_to_post(hit) -> post.Post: + result = hit['_source'] + if 'highlight' in hit: + result['description'] = ' '.join(hit['highlight']['description']) + return result + + def search_query(elastic_search: Elasticsearch, user_query: str, language: str) -> List[post.Post]: """ Performs the search using a query on a specific language """ @@ -152,9 +156,7 @@ def search_query(elastic_search: Elasticsearch, user_query: str, language: str) res = elastic_search.search(index=index_name, body=query) results = [] for hit in res['hits']['hits']: - result = hit['_source'] - if 'highlight' in hit: - result['description'] = ' '.join(hit['highlight']['description']) + result = _from_hit_to_post(hit) results.append(result) return results -- GitLab