Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Unverified Commit 8b278cbf authored by Alexandre Flament's avatar Alexandre Flament Committed by GitHub
Browse files

Merge pull request #2246 from dalf/mod-searx-data

[mod] Add searx.data module
parents e30dc2f0 a9dc54be
Loading
Loading
Loading
Loading

searx/data/__init__.py

0 → 100644
+21 −0
Original line number Diff line number Diff line
import json
from pathlib import Path


__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader']
data_dir = Path(__file__).parent


def load(filename):
    # add str(...) for Python 3.5
    with open(str(data_dir / filename), encoding='utf-8') as fd:
        return json.load(fd)


def bangs_loader():
    return load('bangs.json')


ENGINES_LANGUAGES = load('engines_languages.json')
CURRENCIES = load('currencies.json')
USER_AGENTS = load('useragents.json')
+3 −5
Original line number Diff line number Diff line
@@ -19,13 +19,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
import sys
import threading
from os.path import realpath, dirname
from io import open
from babel.localedata import locale_identifiers
from flask_babel import gettext
from operator import itemgetter
from json import loads
from searx import settings
from searx import logger
from searx.data import ENGINES_LANGUAGES
from searx.poolrequests import get
from searx.utils import load_module, match_language, get_engine_from_settings

@@ -38,7 +37,6 @@ engines = {}

categories = {'general': []}

languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read())
babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
               for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]

@@ -108,8 +106,8 @@ def load_engine(engine_data):
            sys.exit(1)

    # assign supported languages from json file
    if engine_data['name'] in languages:
        setattr(engine, 'supported_languages', languages[engine_data['name']])
    if engine_data['name'] in ENGINES_LANGUAGES:
        setattr(engine, 'supported_languages', ENGINES_LANGUAGES[engine_data['name']])

    # find custom aliases for non standard language codes
    if hasattr(engine, 'supported_languages'):
+7 −21
Original line number Diff line number Diff line
import json
import re
import os
import unicodedata

from io import open
from datetime import datetime

from searx.data import CURRENCIES


categories = []
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
@@ -13,8 +13,6 @@ weight = 100

parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)

db = 1


def normalize_name(name):
    name = name.lower().replace('-', ' ').rstrip('s')
@@ -23,17 +21,17 @@ def normalize_name(name):


def name_to_iso4217(name):
    global db
    global CURRENCIES

    name = normalize_name(name)
    currencies = db['names'].get(name, [name])
    return currencies[0]
    currency = CURRENCIES['names'].get(name, [name])
    return currency[0]


def iso4217_to_name(iso4217, language):
    global db
    global CURRENCIES

    return db['iso4217'].get(iso4217, {}).get(language, iso4217)
    return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)


def request(query, params):
@@ -82,15 +80,3 @@ def response(resp):
    results.append({'answer': answer, 'url': url})

    return results


def load():
    global db

    current_dir = os.path.dirname(os.path.realpath(__file__))
    json_data = open(current_dir + "/../data/currencies.json", 'r', encoding='utf-8').read()

    db = json.loads(json_data)


load()
+4 −8
Original line number Diff line number Diff line
import json
from os.path import join

from searx import searx_dir
from searx.data import bangs_loader

# bangs data coming from the following url convert to json with
# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
@@ -9,8 +6,7 @@ from searx import searx_dir
# NOTE only use the get_bang_url

bangs_data = {}
with open(join(searx_dir, 'data/bangs.json'), encoding='utf-8') as json_file:
    for bang in json.load(json_file)['bang']:
for bang in bangs_loader()['bang']:
    for trigger in bang["triggers"]:
        bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}

+2 −7
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
import os
import sys
import re
import json
import importlib

from numbers import Number
from os.path import splitext, join
from io import open
from random import choice
from html.parser import HTMLParser
from urllib.parse import urljoin, urlparse, unquote
@@ -18,6 +15,7 @@ from babel.core import get_global


from searx import settings
from searx.data import USER_AGENTS
from searx.version import VERSION_STRING
from searx.languages import language_codes
from searx import logger
@@ -31,9 +29,6 @@ blocked_tags = ('script',
ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)

useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
                             + "/data/useragents.json", 'r', encoding='utf-8').read())

xpath_cache = dict()
lang_to_lc_cache = dict()

@@ -50,7 +45,7 @@ def gen_useragent(os=None):

    See searx/data/useragents.json
    """
    return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions'])))
    return str(USER_AGENTS['ua'].format(os=os or choice(USER_AGENTS['os']), version=choice(USER_AGENTS['versions'])))


class HTMLTextExtractorException(Exception):