Source code for dirhunt.tests.test_crawler_url

import unittest

import requests
import requests_mock

from dirhunt.crawler_url import CrawlerUrl
from dirhunt.tests.base import CrawlerTestBase

from dirhunt.tests._compat import patch, Mock


[docs]class TestCrawlerUrl(CrawlerTestBase, unittest.TestCase):
[docs] def test_start(self): crawler = self.get_crawler() crawler.closing = False crawler_url = CrawlerUrl(crawler, self.url) crawler.processing[self.url] = crawler_url with requests_mock.mock() as m: m.get(self.url, headers={'Content-Type': 'text/html'}) crawler_url.start() self.assertIn(self.url, crawler.processed) self.assertNotIn(self.url, crawler.processing) self.assertEqual(crawler.current_processed_count, 1)
[docs] @requests_mock.mock() def test_session_exception(self, req_mock): req_mock.get(self.url, exc=requests.exceptions.ConnectTimeout) crawler = self.get_crawler() with patch('dirhunt.crawler_url.CrawlerUrl.close') as m: crawler_url = CrawlerUrl(crawler, self.url) self.assertEqual(crawler_url.start(), crawler_url) self.assertEqual(crawler.current_processed_count, 1) m.assert_called_once()
[docs] def test_session_read_exception(self): crawler = self.get_crawler() crawler.sessions = Mock() crawler.sessions.get_session.return_value.get.return_value.__enter__ = Mock(**{ 'return_value.status_code': 200, 'return_value.raw.read.side_effect': requests.exceptions.ConnectTimeout(), }) crawler.sessions.get_session.return_value.get.return_value.__exit__ = Mock() with patch('dirhunt.crawler_url.CrawlerUrl.close') as m: crawler_url = CrawlerUrl(crawler, self.url) self.assertEqual(crawler_url.start(), crawler_url) self.assertEqual(crawler.current_processed_count, 1) m.assert_called_once()