diff --git a/src/app.py b/src/app.py index 9b88b2e2fd69e8dd651f910c0d2787d3ce3fac30..92759fd1e915357d3a97c7193d8934f559e31ad7 100644 --- a/src/app.py +++ b/src/app.py @@ -19,9 +19,11 @@ es = connect_elastic(es_host, es_port) @app.route('//search/') def search(language, query): """ Endpoint to search into all contents of the specific language """ + specific_post = searcher.search_especific_query(es, query, language) result = searcher.search_query(es, query, language) data = { 'search_results': result, + 'specific_post': specific_post, } try: resp = gzip_json_response(data) diff --git a/src/searcher.py b/src/searcher.py index fc67c30d5f73605ea78960b0bb25d997b4375179..24a12a12291e7c2ec50ce252157c072300381f26 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -2,6 +2,7 @@ import math from typing import List, Optional from elasticsearch.client import Elasticsearch +import post import indexer @@ -62,7 +63,71 @@ def get_oldest_index(elastic_search: Elasticsearch, indexes: List[str]) -> Optio return oldest_index -def search_query(elastic_search: Elasticsearch, user_query: str, language: str): +def search_especific_query( + elastic_search: Elasticsearch, + user_query: str, + language: str) -> Optional[post.Post]: + """ Performs a more specific search to give priority to titles """ + + index_name = get_index_name_from_lang( + elastic_search=elastic_search, language=language) + if index_name is None: + return None + + query = { + 'query': { + 'bool': { + 'should': [ + { + 'match': { + 'subtitles.keyword': { + 'query': user_query, + 'fuzziness': 0, + 'max_expansions': 1, + 'minimum_should_match': '100%', + }, + } + }, + { + 'match': { + 'title.keyword': { + 'query': user_query, + 'fuzziness': 0, + 'max_expansions': 1, + 'minimum_should_match': '100%', + }, + } + } + ] + }, + }, + 'highlight': { + 'fields': { + 'description': {} + } + }, + '_source': ['title', 'subtitles', 'url', 'description', 'lang', 'body'] + } + + res = elastic_search.search(index=index_name, body=query) + + if len(res['hits']['hits']) == 0: + return None + + for hit in res['hits']['hits']: + return _from_hit_to_post(hit) + + return None + + +def _from_hit_to_post(hit) -> post.Post: + result = hit['_source'] + if 'highlight' in hit: + result['description'] = ' '.join(hit['highlight']['description']) + return result + + +def search_query(elastic_search: Elasticsearch, user_query: str, language: str) -> List[post.Post]: """ Performs the search using a query on a specific language """ index_name = get_index_name_from_lang( @@ -91,9 +156,7 @@ def search_query(elastic_search: Elasticsearch, user_query: str, language: str): res = elastic_search.search(index=index_name, body=query) results = [] for hit in res['hits']['hits']: - result = hit['_source'] - if 'highlight' in hit: - result['description'] = ' '.join(hit['highlight']['description']) + result = _from_hit_to_post(hit) results.append(result) return results