diff --git a/.example.env b/.example.env
new file mode 100644
index 0000000000000000000000000000000000000000..c86789f387ca2a1c1e2ea7ae851c7fba2caecebe
--- /dev/null
+++ b/.example.env
@@ -0,0 +1,3 @@
+ALLOW_ORIGINS="*"
+ES_HOST="localhost"
+ES_PORT=9200
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index ba0430d26c996e7f078385407f959c96c271087c..8a23f93d7cf877b58d14f2b3b4999cb33c57e903 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
-__pycache__/
\ No newline at end of file
+__pycache__/
+.env
+venv
\ No newline at end of file
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ece5b10a82c7d853183496e4845d978c5816d869
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,35 @@
+# When using dind, it's wise to use the overlayfs driver for
+# improved performance.
+variables:
+    DOCKER_DRIVER: overlay2
+stages:
+  - build
+  - deploy
+default:
+  image: docker:20.10
+
+# Build stage
+.build:docker:
+  stage: build
+  tags:
+    - generic_privileged
+  services:
+    - docker:20.10-dind
+  before_script:
+    - docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY
+  script:
+    - docker build . --pull -t "$CI_REGISTRY_IMAGE:$IMAGE_TAG" -f Dockerfile
+    - docker push "$CI_REGISTRY_IMAGE:$IMAGE_TAG"
+
+build:branch:
+  extends: .build:docker
+  rules:
+    - if: '$CI_COMMIT_TAG'
+      when: never
+    - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
+      when: never
+    - if: '$CI_PIPELINE_SOURCE =~ /schedule|web|api|trigger/ && $IMAGE_TAG != $CI_COMMIT_REF_SLUG'
+      when: never
+    - when: on_success
+  variables:
+    IMAGE_TAG: $CI_COMMIT_REF_SLUG
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..2f292652c91f5d2f968f47745016c760b774697a
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.9.6-alpine3.13 AS py
+
+EXPOSE 5000
+
+WORKDIR /usr/src/app
+
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY src .
+
+CMD [ "flask", "run" ]
\ No newline at end of file
diff --git a/README.md b/README.md
index 58d21f0dc6f2011edf8f3db63cfea570b3d6a615..f5ddc2d2a3511b9c6324a436e66a1a9ce7409817 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,32 @@
 
 Full-text search for your Jekyll blog with ElasticSearch.
 
+## Installation
+1. Clone the project
+1. run `python3 -m venv venv` to create the virtual env
+1. run `. venv/bin/activate` to be able to use the python packages
+1. run `pip install -r requirements.txt` to install all packages
+
+To be able to use the linting inside the virtual environment, it is recomended to use the python inside the virtual env folder. Take a look at [here](https://code.visualstudio.com/docs/python/environments#_select-and-activate-an-environment) for vscode
+
+## Development
+1. You need a running copy of elasticsearch:
+```bash
+docker run \
+--name elasticsearch \
+--rm \
+-p 9200:9200 \
+-p 9300:9300 \
+-e "discovery.type=single-node" \
+elasticsearch:7.13.3
+```
+1. Make sure you are at the virtual env (run `. venv/bin/activate`)
+1. To index your content, run: `python3 src/main.py "PATH_TO_YOUR_CONTENT"`
+1. run `export FLASK_ENV=development`
+1. run `export FLASK_APP=src/app`
+1. run `flask run`
+
+
 ## Features
 
  - Parses the html from your Jekyll `_site` directory using BeautifulSoup to get more accurate content instead of using the raw Markdown.
@@ -13,7 +39,7 @@ Full-text search for your Jekyll blog with ElasticSearch.
 ### Indexing:
 
  - Make sure you have an ElasticSearch server running. If not local, change the config in `indexer.py` to reflect your location.
- - Run the command `python main.py <path_to_blog>`, running without an argument will assume your compiled blog is located at `~/blog/_site`.
+ - Run the command `python src/main.py <path_to_blog>`, running without an argument will assume your compiled blog is located at `~/blog/_site`.
  - If the library cannot find your content correctly, modify `indexer.py` to point to the correct HTML elements for title, post content etc (assuming you have unique CSS classes for these).
 
 ### Searching:
diff --git a/find_posts.py b/find_posts.py
deleted file mode 100644
index 826f4470c8e2b722bf7ed8d196f36699101bb515..0000000000000000000000000000000000000000
--- a/find_posts.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import glob
-from bs4 import BeautifulSoup
-from post import Post
-
-def find_post_paths(base_dir):
-    files = glob.glob(base_dir + "/20*/*/*/*.html")
-    files = [f.replace("\\", "/") for f in files]
-    return files
-
-def parse_post(path):
-    with open(path, encoding="utf8") as f:
-        contents = f.read()
-
-        soup = BeautifulSoup(contents, 'html.parser')
-        title = soup.find('h1', { "class" : "post-title" }).text.strip()
-        
-        post_elem = soup.find("div", {"class": "post"})
-        post_elem.find(attrs={"class": "post-title"}).decompose()
-        post_elem.find(attrs={"class": "post-date"}).decompose()
-
-        paras = post_elem.find_all(text=True)
-
-        body = " ".join(p.strip() for p in paras).replace("  ", " ").strip()
-        # remove special characters
-
-        return (title, body)
-
-    raise "Could not read file: " + path
-
-
-def create_posts(base_dir):
-    paths = find_post_paths(base_dir)
-    for path in paths:
-        id = path.replace(base_dir, "").replace("/", "-")
-        url = path.replace(base_dir, "")
-        (title, body) = parse_post(path)
-        yield Post(id, title, url, body)
diff --git a/post.py b/post.py
deleted file mode 100644
index aaef2b534e7bdbf7a4d2d6dcef451c4c225fd9b9..0000000000000000000000000000000000000000
--- a/post.py
+++ /dev/null
@@ -1,6 +0,0 @@
-class Post:
-    def __init__(self, id, title, url, body):
-        self.id = id
-        self.title = title
-        self.url = url
-        self.body = body
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..265688d21e5a680bbbfc1903c3cf51c025277976
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,12 @@
+beautifulsoup4==4.9.3
+certifi==2021.5.30
+click==8.0.1
+elasticsearch==7.13.3
+Flask==2.0.1
+itsdangerous==2.0.1
+Jinja2==3.0.1
+MarkupSafe==2.0.1
+python-dotenv==0.18.0
+soupsieve==2.2.1
+urllib3==1.26.6
+Werkzeug==2.0.1
diff --git a/src/app.py b/src/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..259c08dac0190dbe20bb9133e2a7e9eac11245bf
--- /dev/null
+++ b/src/app.py
@@ -0,0 +1,24 @@
+from flask import Flask, jsonify
+from searcher import search_query
+from indexer import connect_elastic
+from dotenv import load_dotenv
+import os
+
+app = Flask(__name__)
+
+load_dotenv()
+
+es_host = os.getenv('ES_HOST', 'localhost')
+es_port = os.getenv('ES_PORT', 9200)
+
+es = connect_elastic(es_host, es_port)
+
+@app.route('/search/<query>')
+def search(query):
+    result = search_query(es, query)
+    data = {
+        'search_results': result,
+    }
+    resp = jsonify(data)
+    resp.headers['Access-Control-Allow-Origin'] = os.getenv('ALLOW_ORIGINS', '*')
+    return resp
\ No newline at end of file
diff --git a/src/find_posts.py b/src/find_posts.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3cecdea701cfda5f829ed8bec012727b36a3919
--- /dev/null
+++ b/src/find_posts.py
@@ -0,0 +1,104 @@
+import glob
+from bs4 import BeautifulSoup
+from post import Post
+import logging
+
+DEFAULT_LANG = 'en'
+
+def find_post_paths(base_dir):
+    files = glob.glob(base_dir + "/**/*.html", recursive=True)
+    files = [f.replace("\\", "/") for f in files]
+    return files
+
+def get_title_from_htmltree(htmltree: BeautifulSoup):
+    title = htmltree.select_one('[data-elasticsearch-title]')
+    if title == None:
+        title = htmltree.find('h1', { "class" : "post-title" })
+    if title == None:
+        return None
+    return title.text.strip()
+
+def get_body_from_htmltree(htmltree: BeautifulSoup):
+    post_elem = htmltree.select_one('[data-elasticsearch-body]')
+
+    if post_elem == None:
+        post_elem = htmltree.find("div", {"class": "post"})
+        if post_elem == None:
+            return None
+        post_elem.find(attrs={"class": "post-title"}).decompose()
+        post_elem.find(attrs={"class": "post-date"}).decompose()
+
+    paras = post_elem.find_all(text=True)
+
+    body = " ".join(p.strip() for p in paras).replace("  ", " ").strip()
+
+    return body
+
+def get_htmltree_from_file(path: str) -> BeautifulSoup:
+    with open(path, encoding="utf8") as f:
+        contents = f.read()
+        return BeautifulSoup(contents, 'html.parser')
+
+def get_lang_from_htmltree(htmltree: BeautifulSoup) -> str:
+    html = htmltree.select_one('html')
+    lang = html.get('lang')
+    return DEFAULT_LANG if lang == None else lang
+
+def get_description_from_htmltree(htmltree: BeautifulSoup) -> str:
+    metatag = htmltree.select_one('meta[name="description"]')
+    if metatag == None:
+        return None
+    description = metatag.get('content')
+    return description
+
+def should_crawl_page(htmltree: BeautifulSoup) -> bool:
+
+    metatag = htmltree.select_one('meta[name="robots"]')
+    if metatag == None:
+        return True
+
+    metatag_content = metatag.get('content')
+    if metatag_content == None:
+        return True
+    options = metatag_content.split(',')
+
+    if 'noindex' in options:
+        return False
+    return True
+
+def create_posts(base_dir):
+    paths = find_post_paths(base_dir)
+    for path in paths:
+
+        htmltree = get_htmltree_from_file(path)
+
+        should = should_crawl_page(htmltree)
+        if not should:
+            continue
+
+        title = get_title_from_htmltree(htmltree)
+        if title == None:
+            logging.warning(f"No element for title found in '{path}'")
+            continue
+        body = get_body_from_htmltree(htmltree)
+        if body == None:
+            logging.warning(f"No element for body found in '{path}'")
+            continue
+
+        description = get_description_from_htmltree(htmltree)
+        if description == None:
+            description = body
+
+        lang = get_lang_from_htmltree(htmltree)
+
+        id = path.replace(base_dir, "").replace("/", "-")
+        url = path.replace(base_dir, "")
+
+        yield Post(
+            id=id, 
+            title=title, 
+            url=url, 
+            body=body, 
+            description=description,
+            lang=lang
+        )
diff --git a/indexer.py b/src/indexer.py
similarity index 74%
rename from indexer.py
rename to src/indexer.py
index 8fcd94c4522a70347d3039f97dd9b2b0c2b3c4d3..fe4501ed7a5476eb74e1d16737b37971400a2005 100644
--- a/indexer.py
+++ b/src/indexer.py
@@ -1,12 +1,10 @@
 from elasticsearch import Elasticsearch
-import os
-import re
 
 index_name = "blog"
 doc_type = "post"
 
-def connect_elastic(host="localhost", port=9200):
-    return Elasticsearch([{'host': 'localhost', 'port': 9200}])
+def connect_elastic(host='localhost', port=9200):
+    return Elasticsearch([{'host': host, 'port': port}])
 
 def refresh_index(es):
     if es.indices.exists(index=index_name):
@@ -18,7 +16,8 @@ def index_posts(es, posts):
         doc = {
             "title": post.title,
             "url": post.url,
-            "body": post.body
+            "body": post.body,
+            "lang": post.lang,
         }
 
         es.index(index=index_name, doc_type=doc_type, id=post.id, body=doc)
diff --git a/main.py b/src/main.py
similarity index 57%
rename from main.py
rename to src/main.py
index 992bddfd4f8101f4c84d8fa5e0adcfa749beec6a..c5678000555bc13f8e07de5d3ce6cebc12ab518c 100644
--- a/main.py
+++ b/src/main.py
@@ -2,20 +2,27 @@ import sys
 from os.path import expanduser
 from find_posts import create_posts
 import indexer
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
 
 if __name__ == "__main__":
     # provide blog base directory as arg
-    if len(sys.argv) > 1:
-        base_dir = str(sys.argv[1])
-    else:
-        base_dir = expanduser("~") + "/blog/_site"
+    if len(sys.argv) != 2:
+        raise BaseException('You must pass the project folder to be crawled, and only it.')
+
+    base_dir = str(sys.argv[1])
 
     print("Finding posts in %s" % base_dir)
 
     posts = create_posts(base_dir)
     print("Posts created")
 
-    es = indexer.connect_elastic()
+    es_host = os.getenv('ES_HOST', 'localhost')
+    es_port = os.getenv('ES_PORT', 9200)
+
+    es = indexer.connect_elastic(es_host, es_port)
     print("ElasticSearch connection established")
 
     indexer.refresh_index(es)
diff --git a/src/post.py b/src/post.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ad4c113517d5f3b27fdc6b9ffcb33ef647950e3
--- /dev/null
+++ b/src/post.py
@@ -0,0 +1,8 @@
+class Post:
+    def __init__(self, id, title, url, body, description, lang):
+        self.id = id
+        self.title = title
+        self.url = url
+        self.body = body
+        self.description = description
+        self.lang = lang
\ No newline at end of file
diff --git a/searcher.py b/src/searcher.py
similarity index 56%
rename from searcher.py
rename to src/searcher.py
index 9f4eb0aad02fdf717a33b707bc11ed409021e5fd..881b33ac73bbeee29fbdc79626e41c1f7fb719e4 100644
--- a/searcher.py
+++ b/src/searcher.py
@@ -1,10 +1,6 @@
-from elasticsearch import Elasticsearch
+def search_query(es, user_query):
 
-es =  Elasticsearch([{'host': 'localhost', 'port': 9200}])
-
-user_query = "python"
-
-query = {
+  query = {
     "query": {
       "multi_match": {
         "query": user_query,
@@ -19,14 +15,25 @@ query = {
             "body" : {}
         }
     },
-    "_source": ["title", "url"]
-}
+    "_source": ["title", "url", "body", "lang"]
+  }
+
+  res = es.search(index="blog", body=query)
+  results = []
+  for h in res['hits']['hits']:
+    results.append(h['_source'])
+  return results
 
-res = es.search(index="blog", body=query)
-print("Found %d Hits:" % res['hits']['total'])
+if __name__ == "__main__":
+  import os
+  from indexer import connect_elastic
+  from dotenv import load_dotenv
+  load_dotenv()
+  es_host = os.getenv('ES_HOST', 'localhost')
+  es_port = os.getenv('ES_PORT', 9200)
 
-for hit in res['hits']['hits']:
-    print(hit["_source"])
+  es = connect_elastic(es_host, es_port)
+  print(search_query(es, "map"))
 
 # POST /blog/post/_search
 # {