bing.py 2.63 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
"""
 Bing (Web)

 @website     https://www.bing.com
 @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
              max. 5000 query/month

 @using-api   no (because of query limit)
 @results     HTML (using search portal)
 @stable      no (HTML can change)
 @parse       url, title, content

 @todo        publishedDate
"""
15

Gabor Nagy's avatar
Gabor Nagy committed
16
from lxml import html
Cqoicebordel's avatar
Cqoicebordel committed
17
from searx.engines.xpath import extract_text
Adam Tauber's avatar
Adam Tauber committed
18
from searx.url_utils import urlencode
asciimoo's avatar
asciimoo committed
19

20
21
# engine dependent config
categories = ['general']
asciimoo's avatar
asciimoo committed
22
paging = True
asciimoo's avatar
asciimoo committed
23
language_support = True
24
supported_languages_url = 'https://www.bing.com/account/general'
asciimoo's avatar
asciimoo committed
25

26
27
28
# search-url
base_url = 'https://www.bing.com/'
search_string = 'search?{query}&first={offset}'
asciimoo's avatar
asciimoo committed
29

Thomas Pointhuber's avatar
Thomas Pointhuber committed
30

31
# do search-request
asciimoo's avatar
asciimoo committed
32
def request(query, params):
asciimoo's avatar
asciimoo committed
33
    offset = (params['pageno'] - 1) * 10 + 1
34

35
    lang = params['language'].split('-')[0].upper()
36
37

    query = u'language:{} {}'.format(lang, query.decode('utf-8')).encode('utf-8')
38

39
    search_path = search_string.format(
40
        query=urlencode({'q': query}),
asciimoo's avatar
asciimoo committed
41
        offset=offset)
asciimoo's avatar
asciimoo committed
42

asciimoo's avatar
asciimoo committed
43
44
45
46
    params['url'] = base_url + search_path
    return params


47
# get response from search-request
asciimoo's avatar
asciimoo committed
48
49
def response(resp):
    results = []
50

51
    dom = html.fromstring(resp.text)
52

Adam Tauber's avatar
Adam Tauber committed
53
54
55
56
57
58
    try:
        results.append({'number_of_results': int(dom.xpath('//span[@class="sb_count"]/text()')[0]
                                                 .split()[0].replace(',', ''))})
    except:
        pass

59
    # parse results
asciimoo's avatar
asciimoo committed
60
61
62
    for result in dom.xpath('//div[@class="sa_cc"]'):
        link = result.xpath('.//h3/a')[0]
        url = link.attrib.get('href')
Cqoicebordel's avatar
Cqoicebordel committed
63
        title = extract_text(link)
64
        content = extract_text(result.xpath('.//p'))
65

66
        # append result
67
68
        results.append({'url': url,
                        'title': title,
69
70
71
                        'content': content})

    # parse results again if nothing is found yet
72
73
74
    for result in dom.xpath('//li[@class="b_algo"]'):
        link = result.xpath('.//h2/a')[0]
        url = link.attrib.get('href')
Cqoicebordel's avatar
Cqoicebordel committed
75
        title = extract_text(link)
76
        content = extract_text(result.xpath('.//p'))
77
78

        # append result
79
80
        results.append({'url': url,
                        'title': title,
81
82
83
                        'content': content})

    # return results
asciimoo's avatar
asciimoo committed
84
    return results
85
86
87


# get supported languages from their site
88
def _fetch_supported_languages(resp):
89
    supported_languages = []
90
    dom = html.fromstring(resp.text)
91
92
93
    options = dom.xpath('//div[@id="limit-languages"]//input')
    for option in options:
        code = option.xpath('./@id')[0].replace('_', '-')
94
95
        if code == 'nb':
            code = 'no'
96
97
98
        supported_languages.append(code)

    return supported_languages