Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 52d23504 authored by Israel Yago Pereira's avatar Israel Yago Pereira
Browse files

Search by language

parent 281cb33a
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
from flask import Flask, jsonify
from flask import Flask, jsonify, request
from searcher import search_query
from indexer import connect_elastic
from dotenv import load_dotenv
@@ -13,9 +13,9 @@ es_port = os.getenv('ES_PORT', 9200)

es = connect_elastic(es_host, es_port)

@app.route('/search/<query>')
def search(query):
    result = search_query(es, query)
@app.route('/<language>/search/<query>')
def search(language, query):
    result = search_query(es, query, language)
    data = {
        'search_results': result,
    }
+5 −2
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ def should_crawl_page(htmltree: BeautifulSoup) -> bool:

def create_posts(base_dir):
    paths = find_post_paths(base_dir)
    posts = []
    for path in paths:

        htmltree = get_htmltree_from_file(path)
@@ -96,11 +97,13 @@ def create_posts(base_dir):
        id = path.replace(base_dir, "").replace("/", "-")
        url = path.replace(base_dir, "")

        yield Post(
        posts.append(Post(
            id=id, 
            title=title, 
            url=url, 
            body=body, 
            description=description,
            lang=lang
        )
        ))
    
    return posts
+7 −6
Original line number Diff line number Diff line
from elasticsearch import Elasticsearch
from time import sleep

index_name = "blog"
doc_type = "post"

def connect_elastic(host='localhost', port=9200):
@@ -18,10 +17,12 @@ def connect_elastic(host='localhost', port=9200):

    raise TimeoutError(f"Could not connect to elasticsearch server at {host}:{port}.")  

def refresh_index(es):
    if es.indices.exists(index=index_name):
        es.indices.delete(index=index_name)
    es.indices.create(index=index_name)
def refresh_index(es, languages):

    for lang in languages:
        if es.indices.exists(index=lang):
            es.indices.delete(index=lang)
        es.indices.create(index=lang)

def index_posts(es, posts):
    for post in posts:
@@ -32,5 +33,5 @@ def index_posts(es, posts):
            "lang": post.lang,
        }

        es.index(index=index_name, doc_type=doc_type, id=post.id, body=doc)
        es.index(index=post.lang, doc_type=doc_type, id=post.id, body=doc)
        print("Created doc for " + post.url)
+6 −4
Original line number Diff line number Diff line
import sys
from os.path import expanduser
from find_posts import create_posts
import indexer
from dotenv import load_dotenv
@@ -17,7 +16,7 @@ if __name__ == "__main__":
    print("Finding posts in %s" % base_dir)

    posts = create_posts(base_dir)
    print("Posts created")
    print("Posts created ({})".format(len(posts)))

    es_host = os.getenv('ES_HOST', 'localhost')
    es_port = os.getenv('ES_PORT', 9200)
@@ -27,8 +26,11 @@ if __name__ == "__main__":
    es = indexer.connect_elastic(es_host, es_port)
    print("ElasticSearch connection established")

    indexer.refresh_index(es)
    print("Current blog index removed")

    unique_languages = set([post.lang for post in posts ])

    indexer.refresh_index(es, unique_languages)
    print("Current indexes updated")

    indexer.index_posts(es, posts)
    print("Finished indexing posts")
+9 −3
Original line number Diff line number Diff line
def search_query(es, user_query):
from elasticsearch.client import Elasticsearch


def search_query(es: Elasticsearch, user_query: str, language: str):

  if not es.indices.exists(language):
    return []

  query = {
    "query": {
@@ -7,7 +13,7 @@ def search_query(es, user_query):
        "type": "best_fields",
        "fuzziness": "AUTO",
        "tie_breaker": 0.3,
        "fields": ["title^3", "body"]
        "fields": ["title^3", "body"],
      }
    },
    "highlight": {
@@ -18,7 +24,7 @@ def search_query(es, user_query):
    "_source": ["title", "url", "body", "lang"]
  }

  res = es.search(index="blog", body=query)
  res = es.search(index=language, body=query)
  results = []
  for h in res['hits']['hits']:
    results.append(h['_source'])