Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Unverified Commit 360f8fab authored by Adam Tauber's avatar Adam Tauber Committed by GitHub
Browse files

Merge pull request #1186 from kvch/fix-bing-videos

Fix Bing videos engine
parents ccc6955f 3ef8533f
Loading
Loading
Loading
Loading
+5 −17
Original line number Diff line number Diff line
@@ -69,22 +69,11 @@ def response(resp):
    dom = html.fromstring(resp.text)

    for result in dom.xpath('//div[@class="dg_u"]'):

        # try to extract the url
        url_container = result.xpath('.//div[@class="sa_wrapper"]/@data-eventpayload')
        if len(url_container) > 0:
            url = loads(url_container[0])['purl']
        else:
            url = result.xpath('./a/@href')[0]

            # discard results that do not return an external url
            # very recent results sometimes don't return the video's url
            if url.startswith('/videos/search?'):
                continue

        title = extract_text(result.xpath('./a//div[@class="tl"]'))
        content = extract_text(result.xpath('.//div[@class="pubInfo"]'))
        thumbnail = result.xpath('.//div[@class="vthumb"]/img/@src')[0]
        url = result.xpath('./div[@class="mc_vtvc"]/a/@href')[0]
        url = 'https://bing.com' + url
        title = extract_text(result.xpath('./div/a/div/div[@class="mc_vtvc_title"]/@title'))
        content = extract_text(result.xpath('./div/a/div/div/div/div/text()'))
        thumbnail = result.xpath('./div/a/div/div/img/@src')[0]

        results.append({'url': url,
                        'title': title,
@@ -92,7 +81,6 @@ def response(resp):
                        'thumbnail': thumbnail,
                        'template': 'videos.html'})

        # first page ignores requested number of results
        if len(results) >= number_of_results:
            break

+33 −66
Original line number Diff line number Diff line
@@ -47,78 +47,45 @@ class TestBingVideosEngine(SearxTestCase):
        self.assertEqual(bing_videos.response(response), [])

        html = """
        <div>
        <div class="dg_u">
                <a class="dv_i" href="/videos/search?abcde">
                    <div class="vthblock">
                        <div class="vthumb">
            <div id="mc_vtvc_1" class="mc_vtvc">
                <a class="mc_vtvc_link" href="/video">
                    <div class="mc_vtvc_th">
                        <div class="cico">
                            <img src="thumb_1.jpg" />
                        </div>
                        <div>
                            <div class="tl">
                                Title 1
                        <div class="mc_vtvc_ban_lo">
                            <div class="vtbc">
                                <div class="mc_bc_w b_smText">
                                    <div class="mc_bc pivot bpi_2">
                                        <span title="">
                                             <span class="mv_vtvc_play cipg "></span>
                                        </span>
                                    </div>
                                    <div class="mc_bc items">10:06</div>
                                </div>
                            </div>
                    <div class="videoInfoPanel">
                        <div class="pubInfo">
                            <div>Content 1</div>
                        </div>
                        </div>
                </a>
                <div class="sa_wrapper"
                    data-eventpayload="{&quot;purl&quot;: &quot;https://url.com/1&quot;}">
                </div>
            </div>
        </div>
        """
        response = mock.Mock(text=html)
        results = bing_videos.response(response)
        self.assertEqual(type(results), list)
        self.assertEqual(len(results), 1)
        self.assertEqual(results[0]['title'], 'Title 1')
        self.assertEqual(results[0]['url'], 'https://url.com/1')
        self.assertEqual(results[0]['content'], 'Content 1')
        self.assertEqual(results[0]['thumbnail'], 'thumb_1.jpg')

        html = """
        <div>
            <div class="dg_u">
                <a class="dv_i" href="https://url.com/1">
                    <div class="vthblock">
                        <div class="vthumb">
                            <img src="thumb_1.jpg" />
                        </div>
                        <div>
                            <div class="tl">
                                Title 1
                            </div>
                        </div>
                    </div>
                    <div class="videoInfoPanel">
                        <div class="pubInfo">
                            <div>Content 1</div>
                        </div>
                    </div>
                </a>
            </div>
            <div class="dg_u">
                <a class="dv_i" href="/videos/search?abcde">
                    <div class="vthblock">
                        <div class="vthumb">
                            <img src="thumb_2.jpg" />
                        </div>
                        <div>
                            <div class="tl">
                                Title 2
                        <div class="mc_vtvc_meta">
                        <div class="mc_vtvc_title" title="Title 1"></div>
                        <div class="mc_vtvc_meta_block_area">
                        <div class="mc_vtvc_meta_block">
                            <div class="mc_vtvc_meta_row">
                                <span>65,696,000+ views</span>
                                <span>1 year ago</span>
                            </div>
                            <div class="mc_vtvc_meta_row mc_vtvc_meta_row_channel">Content 1</div>
                            <div class="mc_vtvc_meta_row"><span>
                                <div class="cico mc_vtvc_src_ico">
                                    <div></div>
                                </div>
                                <span>YouTube</span>
                            </span></div>
                        </div>
                    <div class="videoInfoPanel">
                        <div class="pubInfo">
                            <div>Content 2</div>
                        </div>
                    </div>
                    <div class="vrhdata"></div>
                    </a>
                </div>
            </div>
@@ -128,6 +95,6 @@ class TestBingVideosEngine(SearxTestCase):
        self.assertEqual(type(results), list)
        self.assertEqual(len(results), 1)
        self.assertEqual(results[0]['title'], 'Title 1')
        self.assertEqual(results[0]['url'], 'https://url.com/1')
        self.assertEqual(results[0]['url'], 'https://bing.com/video')
        self.assertEqual(results[0]['content'], 'Content 1')
        self.assertEqual(results[0]['thumbnail'], 'thumb_1.jpg')