dirhunt package

Submodules

dirhunt.cli module

dirhunt.cli.random_spinner()[source]
dirhunt.cli.spinner(spinner_list)[source]

dirhunt.crawler module

class dirhunt.crawler.Crawler(max_workers=None, interesting_extensions=None, interesting_files=None, std=None, progress_enabled=True, timeout=10, depth=3, not_follow_subdomains=False, exclude_sources=(), not_allow_redirects=False, proxies=None, delay=0, limit=1000, to_file=None, user_agent=None, cookies=None, headers=None)[source]

Bases: ThreadPoolExecutor

add_domain(domain)[source]
add_init_urls(*urls)[source]

Add urls to queue.

add_message(body)[source]
add_url(crawler_url, force=False, lock=True)[source]

Add url to queue

close(create_resume=False)[source]
create_report(to_file)[source]

Write to a file a report with current json() state. This file can be read to continue an analysis.

echo(body)[source]
erase()[source]
get_resume_file()[source]
in_domains(domain)[source]
json()[source]
options()[source]
property options_file
print_progress(finished=False)[source]
print_results(exclude=None, include=None)[source]
print_urls_info()[source]
restart()[source]
resume(path)[source]
urls_info = None

dirhunt.exceptions module

exception dirhunt.exceptions.DirHuntError(extra_body='')[source]

Bases: Exception

body = ''
exception dirhunt.exceptions.EmptyError(extra_body='')[source]

Bases: DirHuntError

exception dirhunt.exceptions.IncompatibleVersionError(extra_body='')[source]

Bases: DirHuntError

exception dirhunt.exceptions.RequestError(extra_body='')[source]

Bases: DirHuntError

dirhunt.exceptions.catch(fn)[source]
dirhunt.exceptions.reraise_with_stack(func)[source]

dirhunt.management module

dirhunt.management.comma_separated(ctx, param, value)[source]
dirhunt.management.comma_separated_files(ctx, param, value)[source]
dirhunt.management.eprint(*args, **kwargs)[source]
dirhunt.management.flags_range(flags)[source]
dirhunt.management.key_value(ctx, param, values)[source]
dirhunt.management.latest_release(package)[source]
dirhunt.management.main()[source]
dirhunt.management.print_version(ctx, param, value)[source]
dirhunt.management.status_code_range(start, end)[source]
dirhunt.management.welcome()[source]

dirhunt.processors module

class dirhunt.processors.Error(crawler_url, error)[source]

Bases: ProcessBase

classmethod is_applicable(request, text, crawler_url, soup)[source]
key_name = 'error'
name = 'Error'
process(text, soup=None)[source]
class dirhunt.processors.GenericProcessor(response, crawler_url)[source]

Bases: ProcessBase

key_name = 'generic'
name = 'Generic'
process(text, soup=None)[source]
class dirhunt.processors.Message(error, level='ERROR')[source]

Bases: Error

maybe_directory()[source]
class dirhunt.processors.ProcessBase(response, crawler_url)[source]

Bases: object

add_url(url, depth=3, **kwargs)[source]
property flags
index_file = None
classmethod is_applicable(request, text, crawler_url, soup)[source]
json()[source]
key_name = ''
maybe_directory()[source]
name = ''
process(text, soup=None)[source]
search_index_files()[source]
status_code = 0
url_line()[source]
class dirhunt.processors.ProcessBlankPageRequest(response, crawler_url)[source]

Bases: ProcessHtmlRequest

classmethod is_applicable(response, text, crawler_url, soup)[source]
key_name = 'blank'
name = 'Blank page'
class dirhunt.processors.ProcessCssStyleSheet(response, crawler_url)[source]

Bases: ProcessBase

classmethod is_applicable(response, text, crawler_url, soup)[source]
key_name = 'css'
name = 'CSS StyleSheet'
process(text, soup=None)[source]
class dirhunt.processors.ProcessHtmlRequest(response, crawler_url)[source]

Bases: ProcessBase

analyze_asset(asset)[source]
assets(soup)[source]
classmethod is_applicable(response, text, crawler_url, soup)[source]
key_name = 'html'
name = 'HTML document'
process(text, soup=None)[source]
class dirhunt.processors.ProcessIndexOfRequest(response, crawler_url)[source]

Bases: ProcessHtmlRequest

files = None
property flags
index_titles = ('index of', 'directory listing for')
interesting_ext_files()[source]
interesting_files()[source]
interesting_name_files()[source]
classmethod is_applicable(response, text, crawler_url, soup)[source]
key_name = 'index_of'
name = 'Index Of'
process(text, soup=None)[source]
classmethod repr_file(file)[source]
class dirhunt.processors.ProcessJavaScript(response, crawler_url)[source]

Bases: ProcessBase

classmethod is_applicable(response, text, crawler_url, soup)[source]
key_name = 'js'
name = 'JavaScript'
process(text, soup=None)[source]
class dirhunt.processors.ProcessNotFound(response, crawler_url)[source]

Bases: ProcessBase

property flags
classmethod is_applicable(request, text, crawler_url, soup)[source]
key_name = 'not_found'
name = 'Not Found'
process(text, soup=None)[source]
class dirhunt.processors.ProcessRedirect(response, crawler_url)[source]

Bases: ProcessBase

classmethod is_applicable(request, text, crawler_url, soup)[source]
key_name = 'redirect'
name = 'Redirect'
process(text, soup=None)[source]
redirector = None
dirhunt.processors.get_processor(response, text, crawler_url, soup)[source]

dirhunt.url module

class dirhunt.url.Url(address)[source]

Bases: object

add_extra(data)[source]
breadcrumb()[source]
copy()[source]
property directories
property directory_path
property domain
property domain_port

Dominio con el puerto si lo hay

property fragment
property full_path
property is_absolute

Si es sólo un path o una dirección entera

is_ip()[source]
is_valid()[source]
json()[source]
property name
property only_domain

Dominio sin el puerto

parent()[source]
property path
property port
property protocol
property protocol_domain
property query
set_children(children)[source]
property url
property urlparsed
dirhunt.url.full_url_address(address, url)[source]

:rtype :Url

dirhunt.utils module

dirhunt.utils.catch_keyboard_interrupt(fn, restart=None)[source]
dirhunt.utils.catch_keyboard_interrupt_choices(fn, choices, default_choice)[source]
dirhunt.utils.colored(text, *colors)[source]
dirhunt.utils.confirm_choices_close(choices, default_choice)[source]
dirhunt.utils.confirm_close()[source]
dirhunt.utils.flat_list(values)[source]
dirhunt.utils.force_url(url)[source]

Transform domain.com to http://domain.com

Try the most common protocols until you get an answer. Check the destination url in case the server is redirecting the response to invalidate it.

dirhunt.utils.lrange(start, end)[source]
dirhunt.utils.multiplier_arg(argument)[source]
dirhunt.utils.multiplier_args(arguments)[source]
dirhunt.utils.read_file_lines(file)[source]
dirhunt.utils.remove_ansi_escape(text)[source]
dirhunt.utils.value_is_file_path(value)[source]

Module contents