Loading searx/engines/subtitleseeker.py +8 −7 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ from cgi import escape from urllib import quote_plus from lxml import html from searx.languages import language_codes from searx.engines.xpath import extract_text # engine dependent config categories = ['videos'] Loading Loading @@ -44,7 +45,7 @@ def response(resp): if resp.search_params['language'] != 'all': search_lang = [lc[1] for lc in language_codes if lc[0][:2] == resp.search_params['language']][0] if lc[0][:2] == resp.search_params['language'].split('_')[0]][0] # parse results for result in dom.xpath(results_xpath): Loading @@ -56,17 +57,17 @@ def response(resp): elif search_lang: href = href + search_lang + '/' title = escape(link.xpath(".//text()")[0]) title = escape(extract_text(link)) content = result.xpath('.//div[contains(@class,"red")]//text()')[0] content = extract_text(result.xpath('.//div[contains(@class,"red")]')) content = content + " - " text = result.xpath('.//div[contains(@class,"grey-web")]')[0] content = content + html.tostring(text, method='text') text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0]) content = content + text if result.xpath(".//span") != []: content = content +\ " - (" +\ result.xpath(".//span//text()")[0].strip() +\ extract_text(result.xpath(".//span")) +\ ")" # append result Loading searx/tests/engines/test_subtitleseeker.py 0 → 100644 +169 −0 Original line number Diff line number Diff line from collections import defaultdict import mock from searx.engines import subtitleseeker from searx.testing import SearxTestCase class TestSubtitleseekerEngine(SearxTestCase): def test_request(self): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 params = subtitleseeker.request(query, dicto) self.assertTrue('url' in params) self.assertTrue(query in params['url']) self.assertTrue('subtitleseeker.com' in params['url']) def test_response(self): dicto = defaultdict(dict) dicto['language'] = 'fr_FR' response = mock.Mock(search_params=dicto) self.assertRaises(AttributeError, subtitleseeker.response, None) self.assertRaises(AttributeError, subtitleseeker.response, []) self.assertRaises(AttributeError, subtitleseeker.response, '') self.assertRaises(AttributeError, subtitleseeker.response, '[]') response = mock.Mock(text='<html></html>', search_params=dicto) self.assertEqual(subtitleseeker.response(response), []) html = """ <div class="boxRows"> <div class="boxRowsInner" style="width:600px;"> <img src="http://static.subtitleseeker.com/images/movie.gif" style="width:16px; height:16px;" class="icon"> <a href="http://this.is.the.url/" class="blue" title="Title subtitle" > This is the Title </a> <br><br> <span class="f10b grey-dark arial" style="padding:0px 0px 5px 20px"> "Alternative Title" </span> </div> <div class="boxRowsInner f12b red" style="width:70px;"> 1998 </div> <div class="boxRowsInner grey-web f12" style="width:120px;"> <img src="http://static.subtitleseeker.com/images/basket_put.png" style="width:16px; height:16px;" class="icon"> 1039 Subs </div> <div class="boxRowsInner grey-web f10" style="width:130px;"> <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" style="width:16px; height:16px;" class="icon"> 1 hours ago </div> <div class="clear"></div> </div> """ response = mock.Mock(text=html, search_params=dicto) results = subtitleseeker.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], 'This is the Title') self.assertEqual(results[0]['url'], 'http://this.is.the.url/French/') self.assertIn('1998', results[0]['content']) self.assertIn('1039 Subs', results[0]['content']) self.assertIn('Alternative Title', results[0]['content']) html = """ <div class="boxRows"> <div class="boxRowsInner" style="width:600px;"> <img src="http://static.subtitleseeker.com/images/movie.gif" style="width:16px; height:16px;" class="icon"> <a href="http://this.is.the.url/" class="blue" title="Title subtitle" > This is the Title </a> </div> <div class="boxRowsInner f12b red" style="width:70px;"> 1998 </div> <div class="boxRowsInner grey-web f12" style="width:120px;"> <img src="http://static.subtitleseeker.com/images/basket_put.png" style="width:16px; height:16px;" class="icon"> 1039 Subs </div> <div class="boxRowsInner grey-web f10" style="width:130px;"> <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" style="width:16px; height:16px;" class="icon"> 1 hours ago </div> <div class="clear"></div> </div> """ dicto['language'] = 'all' response = mock.Mock(text=html, search_params=dicto) results = subtitleseeker.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], 'This is the Title') self.assertEqual(results[0]['url'], 'http://this.is.the.url/') self.assertIn('1998', results[0]['content']) self.assertIn('1039 Subs', results[0]['content']) html = """ <div class="boxRows"> <div class="boxRowsInner" style="width:600px;"> <img src="http://static.subtitleseeker.com/images/movie.gif" style="width:16px; height:16px;" class="icon"> <a href="http://this.is.the.url/" class="blue" title="Title subtitle" > This is the Title </a> </div> <div class="boxRowsInner f12b red" style="width:70px;"> 1998 </div> <div class="boxRowsInner grey-web f12" style="width:120px;"> <img src="http://static.subtitleseeker.com/images/basket_put.png" style="width:16px; height:16px;" class="icon"> 1039 Subs </div> <div class="boxRowsInner grey-web f10" style="width:130px;"> <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" style="width:16px; height:16px;" class="icon"> 1 hours ago </div> <div class="clear"></div> </div> """ subtitleseeker.language = 'English' response = mock.Mock(text=html, search_params=dicto) results = subtitleseeker.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], 'This is the Title') self.assertEqual(results[0]['url'], 'http://this.is.the.url/English/') self.assertIn('1998', results[0]['content']) self.assertIn('1039 Subs', results[0]['content']) html = """ <div class="boxRowsInner" style="width:600px;"> <img src="http://static.subtitleseeker.com/images/movie.gif" style="width:16px; height:16px;" class="icon"> <a href="http://this.is.the.url/" class="blue" title="Title subtitle" > This is the Title </a> </div> <div class="boxRowsInner f12b red" style="width:70px;"> 1998 </div> <div class="boxRowsInner grey-web f12" style="width:120px;"> <img src="http://static.subtitleseeker.com/images/basket_put.png" style="width:16px; height:16px;" class="icon"> 1039 Subs </div> <div class="boxRowsInner grey-web f10" style="width:130px;"> <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" style="width:16px; height:16px;" class="icon"> 1 hours ago </div> """ response = mock.Mock(text=html, search_params=dicto) results = subtitleseeker.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) searx/tests/test_engines.py +1 −0 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa from searx.tests.engines.test_stackoverflow import * # noqa from searx.tests.engines.test_subtitleseeker import * # noqa from searx.tests.engines.test_twitter import * # noqa from searx.tests.engines.test_vimeo import * # noqa from searx.tests.engines.test_www500px import * # noqa Loading Loading
searx/engines/subtitleseeker.py +8 −7 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ from cgi import escape from urllib import quote_plus from lxml import html from searx.languages import language_codes from searx.engines.xpath import extract_text # engine dependent config categories = ['videos'] Loading Loading @@ -44,7 +45,7 @@ def response(resp): if resp.search_params['language'] != 'all': search_lang = [lc[1] for lc in language_codes if lc[0][:2] == resp.search_params['language']][0] if lc[0][:2] == resp.search_params['language'].split('_')[0]][0] # parse results for result in dom.xpath(results_xpath): Loading @@ -56,17 +57,17 @@ def response(resp): elif search_lang: href = href + search_lang + '/' title = escape(link.xpath(".//text()")[0]) title = escape(extract_text(link)) content = result.xpath('.//div[contains(@class,"red")]//text()')[0] content = extract_text(result.xpath('.//div[contains(@class,"red")]')) content = content + " - " text = result.xpath('.//div[contains(@class,"grey-web")]')[0] content = content + html.tostring(text, method='text') text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0]) content = content + text if result.xpath(".//span") != []: content = content +\ " - (" +\ result.xpath(".//span//text()")[0].strip() +\ extract_text(result.xpath(".//span")) +\ ")" # append result Loading
searx/tests/engines/test_subtitleseeker.py 0 → 100644 +169 −0 Original line number Diff line number Diff line from collections import defaultdict import mock from searx.engines import subtitleseeker from searx.testing import SearxTestCase class TestSubtitleseekerEngine(SearxTestCase): def test_request(self): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 params = subtitleseeker.request(query, dicto) self.assertTrue('url' in params) self.assertTrue(query in params['url']) self.assertTrue('subtitleseeker.com' in params['url']) def test_response(self): dicto = defaultdict(dict) dicto['language'] = 'fr_FR' response = mock.Mock(search_params=dicto) self.assertRaises(AttributeError, subtitleseeker.response, None) self.assertRaises(AttributeError, subtitleseeker.response, []) self.assertRaises(AttributeError, subtitleseeker.response, '') self.assertRaises(AttributeError, subtitleseeker.response, '[]') response = mock.Mock(text='<html></html>', search_params=dicto) self.assertEqual(subtitleseeker.response(response), []) html = """ <div class="boxRows"> <div class="boxRowsInner" style="width:600px;"> <img src="http://static.subtitleseeker.com/images/movie.gif" style="width:16px; height:16px;" class="icon"> <a href="http://this.is.the.url/" class="blue" title="Title subtitle" > This is the Title </a> <br><br> <span class="f10b grey-dark arial" style="padding:0px 0px 5px 20px"> "Alternative Title" </span> </div> <div class="boxRowsInner f12b red" style="width:70px;"> 1998 </div> <div class="boxRowsInner grey-web f12" style="width:120px;"> <img src="http://static.subtitleseeker.com/images/basket_put.png" style="width:16px; height:16px;" class="icon"> 1039 Subs </div> <div class="boxRowsInner grey-web f10" style="width:130px;"> <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" style="width:16px; height:16px;" class="icon"> 1 hours ago </div> <div class="clear"></div> </div> """ response = mock.Mock(text=html, search_params=dicto) results = subtitleseeker.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], 'This is the Title') self.assertEqual(results[0]['url'], 'http://this.is.the.url/French/') self.assertIn('1998', results[0]['content']) self.assertIn('1039 Subs', results[0]['content']) self.assertIn('Alternative Title', results[0]['content']) html = """ <div class="boxRows"> <div class="boxRowsInner" style="width:600px;"> <img src="http://static.subtitleseeker.com/images/movie.gif" style="width:16px; height:16px;" class="icon"> <a href="http://this.is.the.url/" class="blue" title="Title subtitle" > This is the Title </a> </div> <div class="boxRowsInner f12b red" style="width:70px;"> 1998 </div> <div class="boxRowsInner grey-web f12" style="width:120px;"> <img src="http://static.subtitleseeker.com/images/basket_put.png" style="width:16px; height:16px;" class="icon"> 1039 Subs </div> <div class="boxRowsInner grey-web f10" style="width:130px;"> <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" style="width:16px; height:16px;" class="icon"> 1 hours ago </div> <div class="clear"></div> </div> """ dicto['language'] = 'all' response = mock.Mock(text=html, search_params=dicto) results = subtitleseeker.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], 'This is the Title') self.assertEqual(results[0]['url'], 'http://this.is.the.url/') self.assertIn('1998', results[0]['content']) self.assertIn('1039 Subs', results[0]['content']) html = """ <div class="boxRows"> <div class="boxRowsInner" style="width:600px;"> <img src="http://static.subtitleseeker.com/images/movie.gif" style="width:16px; height:16px;" class="icon"> <a href="http://this.is.the.url/" class="blue" title="Title subtitle" > This is the Title </a> </div> <div class="boxRowsInner f12b red" style="width:70px;"> 1998 </div> <div class="boxRowsInner grey-web f12" style="width:120px;"> <img src="http://static.subtitleseeker.com/images/basket_put.png" style="width:16px; height:16px;" class="icon"> 1039 Subs </div> <div class="boxRowsInner grey-web f10" style="width:130px;"> <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" style="width:16px; height:16px;" class="icon"> 1 hours ago </div> <div class="clear"></div> </div> """ subtitleseeker.language = 'English' response = mock.Mock(text=html, search_params=dicto) results = subtitleseeker.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], 'This is the Title') self.assertEqual(results[0]['url'], 'http://this.is.the.url/English/') self.assertIn('1998', results[0]['content']) self.assertIn('1039 Subs', results[0]['content']) html = """ <div class="boxRowsInner" style="width:600px;"> <img src="http://static.subtitleseeker.com/images/movie.gif" style="width:16px; height:16px;" class="icon"> <a href="http://this.is.the.url/" class="blue" title="Title subtitle" > This is the Title </a> </div> <div class="boxRowsInner f12b red" style="width:70px;"> 1998 </div> <div class="boxRowsInner grey-web f12" style="width:120px;"> <img src="http://static.subtitleseeker.com/images/basket_put.png" style="width:16px; height:16px;" class="icon"> 1039 Subs </div> <div class="boxRowsInner grey-web f10" style="width:130px;"> <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" style="width:16px; height:16px;" class="icon"> 1 hours ago </div> """ response = mock.Mock(text=html, search_params=dicto) results = subtitleseeker.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0)
searx/tests/test_engines.py +1 −0 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa from searx.tests.engines.test_stackoverflow import * # noqa from searx.tests.engines.test_subtitleseeker import * # noqa from searx.tests.engines.test_twitter import * # noqa from searx.tests.engines.test_vimeo import * # noqa from searx.tests.engines.test_www500px import * # noqa Loading