test_google.py 10 KB
Newer Older
Cqoicebordel's avatar
Cqoicebordel committed
1
2
3
4
5
6
7
8
9
10
# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
import lxml
from searx.engines import google
from searx.testing import SearxTestCase


class TestGoogleEngine(SearxTestCase):

11
    def mock_response(self, text):
12
        response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1&gws_rd=cr')
13
14
15
16
        response.search_params = mock.Mock()
        response.search_params.get = mock.Mock(return_value='www.google.com')
        return response

Cqoicebordel's avatar
Cqoicebordel committed
17
    def test_request(self):
18
19
        google.supported_languages = ['en', 'fr', 'zh-CN', 'iw']
        google.language_aliases = {'he': 'iw'}
20

Cqoicebordel's avatar
Cqoicebordel committed
21
22
23
        query = 'test_query'
        dicto = defaultdict(dict)
        dicto['pageno'] = 1
marc's avatar
marc committed
24
        dicto['language'] = 'fr-FR'
Adam Tauber's avatar
Adam Tauber committed
25
        dicto['time_range'] = ''
Cqoicebordel's avatar
Cqoicebordel committed
26
27
28
        params = google.request(query, dicto)
        self.assertIn('url', params)
        self.assertIn(query, params['url'])
29
        self.assertIn('google.fr', params['url'])
30
        self.assertIn('fr', params['url'])
Cqoicebordel's avatar
Cqoicebordel committed
31
32
        self.assertIn('fr', params['headers']['Accept-Language'])

33
        dicto['language'] = 'en-US'
Cqoicebordel's avatar
Cqoicebordel committed
34
        params = google.request(query, dicto)
35
36
        self.assertIn('google.com', params['url'])
        self.assertIn('en', params['url'])
Cqoicebordel's avatar
Cqoicebordel committed
37
38
        self.assertIn('en', params['headers']['Accept-Language'])

39
40
41
        dicto['language'] = 'zh'
        params = google.request(query, dicto)
        self.assertIn('google.com', params['url'])
42
        self.assertIn('zh-CN', params['url'])
43
44
        self.assertIn('zh-CN', params['headers']['Accept-Language'])

45
46
47
48
49
50
        dicto['language'] = 'he'
        params = google.request(query, dicto)
        self.assertIn('google.com', params['url'])
        self.assertIn('iw', params['url'])
        self.assertIn('iw', params['headers']['Accept-Language'])

Cqoicebordel's avatar
Cqoicebordel committed
51
52
53
54
55
56
    def test_response(self):
        self.assertRaises(AttributeError, google.response, None)
        self.assertRaises(AttributeError, google.response, [])
        self.assertRaises(AttributeError, google.response, '')
        self.assertRaises(AttributeError, google.response, '[]')

57
        response = self.mock_response('<html></html>')
Cqoicebordel's avatar
Cqoicebordel committed
58
59
60
        self.assertEqual(google.response(response), [])

        html = """
61
        <div class="g">
Cqoicebordel's avatar
Cqoicebordel committed
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
            <h3 class="r">
                <a href="http://this.should.be.the.link/">
                    <b>This</b> is <b>the</b> title
                </a>
            </h3>
            <div class="s">
                <div class="kv" style="margin-bottom:2px">
                    <cite>
                        <b>test</b>.psychologies.com/
                    </cite>
                    <div class="_nBb">
                        <div style="display:inline" onclick="google.sham(this);" aria-expanded="false"
                            aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">
                            <span class="_O0">
                            </span>
                        </div>
                        <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1">
                            <ul>
                                <li class="_Ykb">
                                    <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent
                                        .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/">
                                        En cache
                                    </a>
                                </li>
                                <li class="_Ykb">
                                    <a class="_Zkb" href="/search?safe=off&amp;q=related:test.psy.com/">
                                        Pages similaires
                                    </a>
                                </li>
                            </ul>
                        </div>
                    </div>
                </div>
                <span class="st">
                    This should be the content.
                </span>
                <br>
                <div class="osl">
                    <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/">
                        Test Personnalité
                    </a> - 
                    <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/">
                        Tests - Moi
                    </a> - 
                    <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple">
                        Test Couple
                    </a>
                    - 
                    <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour">
                        Test Amour
                    </a>
                </div>
            </div>
115
116
        </div>
        <div class="g">
Cqoicebordel's avatar
Cqoicebordel committed
117
118
119
120
121
            <h3 class="r">
                <a href="http://www.google.com/images?q=toto">
                    <b>This</b>
                </a>
            </h3>
122
123
        </div>
        <div class="g">
Cqoicebordel's avatar
Cqoicebordel committed
124
125
126
127
128
            <h3 class="r">
                <a href="http://www.google.com/search?q=toto">
                    <b>This</b> is
                </a>
            </h3>
129
130
        </div>
        <div class="g">
Cqoicebordel's avatar
Cqoicebordel committed
131
132
133
134
135
            <h3 class="r">
                <a href="€">
                    <b>This</b> is <b>the</b>
                </a>
            </h3>
136
137
        </div>
        <div class="g">
Cqoicebordel's avatar
Cqoicebordel committed
138
139
140
141
142
            <h3 class="r">
                <a href="/url?q=url">
                    <b>This</b> is <b>the</b>
                </a>
            </h3>
143
        </div>
Cqoicebordel's avatar
Cqoicebordel committed
144
145
146
147
148
149
        <p class="_Bmc" style="margin:3px 8px">
            <a href="/search?num=20&amp;safe=off&amp;q=t&amp;revid=1754833769&amp;sa=X&amp;ei=-&amp;ved=">
                suggestion <b>title</b>
            </a>
        </p>
        """
150
        response = self.mock_response(html)
Cqoicebordel's avatar
Cqoicebordel committed
151
152
153
154
155
156
157
158
159
160
161
162
        results = google.response(response)
        self.assertEqual(type(results), list)
        self.assertEqual(len(results), 2)
        self.assertEqual(results[0]['title'], 'This is the title')
        self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
        self.assertEqual(results[0]['content'], 'This should be the content.')
        self.assertEqual(results[1]['suggestion'], 'suggestion title')

        html = """
        <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
        </li>
        """
163
        response = self.mock_response(html)
Cqoicebordel's avatar
Cqoicebordel committed
164
165
166
167
        results = google.response(response)
        self.assertEqual(type(results), list)
        self.assertEqual(len(results), 0)

168
169
170
171
172
173
174
175
176
177
        response = mock.Mock(text='<html></html>', url='https://sorry.google.com')
        response.search_params = mock.Mock()
        response.search_params.get = mock.Mock(return_value='www.google.com')
        self.assertRaises(RuntimeWarning, google.response, response)

        response = mock.Mock(text='<html></html>', url='https://www.google.com/sorry/IndexRedirect')
        response.search_params = mock.Mock()
        response.search_params.get = mock.Mock(return_value='www.google.com')
        self.assertRaises(RuntimeWarning, google.response, response)

Cqoicebordel's avatar
Cqoicebordel committed
178
179
180
181
182
183
184
185
186
187
188
189
    def test_parse_images(self):
        html = """
        <li>
            <div>
                <a href="http://www.google.com/url?q=http://this.is.the.url/">
                    <img style="margin:3px 0;margin-right:6px;padding:0" height="90"
                        src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0">
                </a>
            </div>
        </li>
        """
        dom = lxml.html.fromstring(html)
190
        results = google.parse_images(dom, 'www.google.com')
Cqoicebordel's avatar
Cqoicebordel committed
191
192
193
194
195
196
        self.assertEqual(type(results), list)
        self.assertEqual(len(results), 1)
        self.assertEqual(results[0]['url'], 'http://this.is.the.url/')
        self.assertEqual(results[0]['title'], '')
        self.assertEqual(results[0]['content'], '')
        self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253

    def test_fetch_supported_languages(self):
        html = """<html></html>"""
        response = mock.Mock(text=html)
        languages = google._fetch_supported_languages(response)
        self.assertEqual(type(languages), dict)
        self.assertEqual(len(languages), 0)

        html = u"""
        <html>
            <body>
                <table>
                    <tbody>
                        <tr>
                            <td>
                                <font>
                                    <label>
                                        <span id="ten">English</span>
                                    </label>
                                </font>
                            </td>
                            <td>
                                <font>
                                    <label>
                                        <span id="tzh-CN">中文 (简体)</span>
                                    </label>
                                    <label>
                                        <span id="tzh-TW">中文 (繁體)</span>
                                    </label>
                                </font>
                            </td>
                        </tr>
                    </tbody>
                </table>
            </body>
        </html>
        """
        response = mock.Mock(text=html)
        languages = google._fetch_supported_languages(response)
        self.assertEqual(type(languages), dict)
        self.assertEqual(len(languages), 3)

        self.assertIn('en', languages)
        self.assertIn('zh-CN', languages)
        self.assertIn('zh-TW', languages)

        self.assertEquals(type(languages['en']), dict)
        self.assertEquals(type(languages['zh-CN']), dict)
        self.assertEquals(type(languages['zh-TW']), dict)

        self.assertIn('name', languages['en'])
        self.assertIn('name', languages['zh-CN'])
        self.assertIn('name', languages['zh-TW'])

        self.assertEquals(languages['en']['name'], 'English')
        self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
        self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')