Source code for dirhunt.management

# -*- coding: utf-8 -*-

from __future__ import print_function

import re
import click as click
import os

import sys

from click import BadOptionUsage, Path, BadParameter

from dirhunt.crawler import Crawler
from dirhunt.exceptions import DirHuntError, catch, IncompatibleVersionError
from dirhunt.output import output_urls
from dirhunt.sources import SOURCE_CLASSES, get_source_name
from dirhunt.utils import lrange, catch_keyboard_interrupt, force_url, read_file_lines, value_is_file_path, flat_list, \
    multiplier_args, catch_keyboard_interrupt_choices
from colorama import init

init(autoreset=True)

STATUS_CODES = lrange(100, 102 + 1) + lrange(200, 208 + 1) + [226] + lrange(300, 308 + 1) + lrange(400, 418 + 1) + \
               lrange(421, 426 + 1) + [428, 429, 431, 451] + lrange(500, 511 + 1)
INTERESTING_EXTS = ['php', 'zip', 'sh', 'asp', 'csv', 'log']
INTERESTING_FILES = ['access_log', 'error_log', 'error', 'logs', 'dump']
STDOUT_FLAGS = ['blank', 'not_found.fake', 'html']


[docs]def latest_release(package):
    if sys.version_info > (3,):
        from xmlrpc import client
    else:
        import xmlrpclib as client
    pypi = client.ServerProxy('https://pypi.python.org/pypi')
    available = pypi.package_releases(package)
    if not available:
        # Try to capitalize pkg name
        available = pypi.package_releases(package.capitalize())
    if not available:
        return
    return available[0]


[docs]def comma_separated(ctx, param, value):
    return (value).split(',') if value else []


[docs]def comma_separated_files(ctx, param, value):
    values = comma_separated(ctx, param, value)
    items = []
    for value in values:
        if value_is_file_path(value):
            items += read_file_lines(value)
        else:
            items.append(value)
    return items


[docs]def key_value(ctx, param, values):
    items = [item.split(':', 1) for item in values]
    if any(filter(lambda x: len(x) < 2, items)):
        raise BadParameter('Expect a value with format key:bar', ctx, param)
    return {x[0].strip(): x[1].strip() for x in items}


[docs]def status_code_range(start, end):
    return list(filter(lambda x: start <= x <= end, STATUS_CODES))


[docs]def eprint(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)


[docs]def print_version(ctx, param, value):
    if not value or ctx.resilient_parsing:
        return
    import dirhunt
    from dirhunt import __version__
    latest = latest_release('dirhunt')
    release = ('This is the latest release' if latest == __version__
               else 'There is a new version available: {}. Upgrade it using: '
                    'sudo pip install -U dirhunt'.format(latest))
    click.echo('You are running Dirhunt v{} using Python {}.\n{}\n'
               'Installation path: {}\n'
               'Current path: {}\n'.format(
        __version__, sys.version.split()[0], release, os.path.dirname(dirhunt.__file__), os.getcwd()
    ))
    ctx.exit()


[docs]def welcome():
    from dirhunt import __version__
    click.secho('Welcome to Dirhunt v{} using Python {}'.format(__version__, sys.version.split()[0]),
                fg='cyan')


[docs]def flags_range(flags):
    for code in tuple(flags):
        match = re.match('^(\d{3})-(\d{3})$', code)
        if match:
            flags.remove(code)
            flags += list(map(str, status_code_range(*map(int, match.groups()))))
    return flags


@click.command(help='One or more domain or urls. Load urls from files using the /full/path or ./relative/path.')
@click.argument('urls', nargs=-1, type=force_url)
@click.option('-t', '--threads', type=int,
              help='Number of threads to use.')
@click.option('-x', '--exclude-flags', callback=comma_separated_files,
              help='Exclude results with these flags. See documentation.')
@click.option('-i', '--include-flags', callback=comma_separated_files,
              help='Only include results with these flags. See documentation.')
@click.option('-e', '--interesting-extensions', callback=comma_separated_files, default=','.join(INTERESTING_EXTS),
              help='The files found with these extensions are interesting')
@click.option('-f', '--interesting-files', callback=comma_separated_files, default=','.join(INTERESTING_FILES),
              help='The files with these names are interesting')
@click.option('--stdout-flags', callback=comma_separated_files, default=','.join(STDOUT_FLAGS),
              help='Return only in stdout the urls of these flags')
@click.option('--progress-enabled/--progress-disabled', default=None)
@click.option('--timeout', default=10)
@click.option('--max-depth', default=3, help='Maximum links to follow without increasing directories depth')
@click.option('--not-follow-subdomains', is_flag=True, help='The subdomains will be ignored')
@click.option('--exclude-sources', callback=comma_separated_files,
              help='Exclude source engines. Possible options: {}'.format(', '.join(
                  [get_source_name(src) for src in SOURCE_CLASSES])
              ))
@click.option('-p', '--proxies', callback=comma_separated_files,
              help='Set one or more proxies to alternate between them')
@click.option('-d', '--delay', default=0, type=float,
              help='Delay between requests to avoid bans by the server')
@click.option('--not-allow-redirects', is_flag=True, help='Redirectors will not be followed')
@click.option('--limit', type=int, default=1000, help='Max number of pages processed to search for directories.')
@click.option('--to-file', type=Path(writable=True), default=None, help='Create a report file in JSON.')
@click.option('-u', '--user-agent', type=str, default=None, help='User agent to use. By default a random browser.')
@click.option('-c', '--cookie', 'cookies', callback=key_value, multiple=True,
              help='Add a cookie to requests in the cookie_name:value format.')
@click.option('-h', '--header', 'headers', callback=key_value, multiple=True,
              help='Add a header to requests in the header:value format.')
@click.option('--version', is_flag=True, callback=print_version,
              expose_value=False, is_eager=True)
def hunt(urls, threads, exclude_flags, include_flags, interesting_extensions, interesting_files, stdout_flags,
         progress_enabled, timeout, max_depth, not_follow_subdomains, exclude_sources, proxies, delay,
         not_allow_redirects, limit, to_file, user_agent, cookies, headers):
    """Find web directories without bruteforce
    """
    if exclude_flags and include_flags:
        raise BadOptionUsage('--exclude-flags and --include-flags are mutually exclusive.')
    welcome()
    urls = flat_list(urls)
    proxies = multiplier_args(proxies)
    if not urls:
        click.echo('•_•) OOPS! Add urls to analyze.\nFor example: dirhunt http://domain/path\n\n'
                   'Need help? Then use dirhunt --help', err=True)
        return
    exclude_flags, include_flags = flags_range(exclude_flags), flags_range(include_flags)
    progress_enabled = (sys.stdout.isatty() or sys.stderr.isatty()) if progress_enabled is None else progress_enabled
    crawler = Crawler(max_workers=threads, interesting_extensions=interesting_extensions,
                      interesting_files=interesting_files, std=sys.stdout if sys.stdout.isatty() else sys.stderr,
                      progress_enabled=progress_enabled, timeout=timeout, depth=max_depth,
                      not_follow_subdomains=not_follow_subdomains, exclude_sources=exclude_sources,
                      not_allow_redirects=not_allow_redirects, proxies=proxies, delay=delay, limit=limit,
                      to_file=to_file, user_agent=user_agent, cookies=cookies, headers=headers)
    crawler.add_init_urls(*urls)
    if os.path.exists(crawler.get_resume_file()):
        click.echo('Resuming the previous program execution...')
        try:
            crawler.resume(crawler.get_resume_file())
        except IncompatibleVersionError as e:
            click.echo(e)
    while True:
        choice = catch_keyboard_interrupt_choices(crawler.print_results, ['abort', 'continue', 'results'], 'a') \
            (set(exclude_flags), set(include_flags))
        if choice == 'a':
            crawler.close(True)
            click.echo('Created resume file "{}". Run again using the same parameters to resume.'.format(
                crawler.get_resume_file())
            )
            return
        elif choice == 'c':
            crawler.restart()
            continue
        else:
            break
    crawler.print_urls_info()
    if not sys.stdout.isatty():
        output_urls(crawler, stdout_flags)
    if to_file:
        crawler.create_report(to_file)
    if not to_file and os.path.exists(crawler.get_resume_file()):
        # The resume file exists. Deleting...
        os.remove(crawler.get_resume_file())


[docs]def main():
    catch(hunt)()
Source code for dirhunt.management

Navigation

Related Topics