Source code for dirhunt.tests.test_crawler
import unittest
from concurrent.futures import ThreadPoolExecutor
from unittest.mock import mock_open
from dirhunt import __version__
from dirhunt.tests._compat import Mock, patch
from dirhunt.crawler import Crawler
from dirhunt.crawler_url import CrawlerUrl
from dirhunt.processors import GenericProcessor
from dirhunt.tests.base import CrawlerTestBase
REPORT_DATA = {
"version": __version__,
"processed": [
{
"crawler_url": {
"depth": 3,
"exists": None,
"flags": [
"302",
"redirect"
],
"type": "directory",
"url": {
"address": "https://site.com/",
"domain": "site.com"
}
},
"line": "",
"processor_class": "ProcessRedirect",
"status_code": 302
},
],
"processing": [
"https://site.com/path/",
]
}
[docs]class TestCrawler(CrawlerTestBase, unittest.TestCase):
[docs] def test_print_results(self):
crawler = self.get_crawler()
crawler_url = CrawlerUrl(crawler, self.url)
crawler.results.put(GenericProcessor(None, crawler_url))
crawler.print_results()
[docs] @patch('dirhunt.crawler.json.dump')
@patch('builtins.open')
def test_create_report(self, _, mock_dump):
crawler = self.get_crawler()
crawler.results.put(GenericProcessor(None, CrawlerUrl(crawler, self.url)))
crawler.create_report(crawler.get_resume_file())
mock_dump.assert_called_once()
[docs] @patch('dirhunt.crawler.json.load', return_value=REPORT_DATA)
@patch('dirhunt.crawler.Crawler.echo', return_value=REPORT_DATA)
@patch('dirhunt.crawler.Crawler.add_url', return_value=REPORT_DATA)
@patch('builtins.open')
def test_resume(self, _, m1, m2, m3):
crawler = self.get_crawler()
crawler.resume(crawler.get_resume_file())
m3.assert_called_once()
m2.assert_called_once()
m1.assert_called_once_with(REPORT_DATA['processing'][0], lock=False)
[docs] def test_print_results_limit(self):
crawler = self.get_crawler(limit=1)
crawler.current_processed_count = 1
crawler_url = CrawlerUrl(crawler, self.url)
crawler.results.put(GenericProcessor(None, crawler_url))
crawler.print_results()
self.assertTrue(crawler.closing)
[docs] def test_add_url(self):
crawler = self.get_crawler()
crawler.domains.add('domain.com')
crawler_url = CrawlerUrl(crawler, self.url)
with patch.object(ThreadPoolExecutor, 'submit') as mock_method:
crawler.add_url(crawler_url)
[docs] def test_add_init_urls(self):
crawler = self.get_crawler()
with patch.object(Crawler, 'add_url') as m:
crawler.add_init_urls(self.url)
m.assert_called_once()
self.assertEqual(crawler.domains, {'domain.com'})
[docs] def test_erase_tty(self):
crawler = self.get_crawler()
crawler.std = Mock(**{'isatty.return_value': True})
crawler.erase()
[docs] @patch('dirhunt.crawler.Crawler.create_report')
@patch('dirhunt.crawler.unregister')
def test_close(self, m1, m2):
crawler = self.get_crawler()
crawler.close(True)
m2.assert_called_once()
m1.assert_called_once()