Source code for pip_rating.sources.sourcerank

import datetime
import time
from functools import cached_property
from typing import Iterator, Tuple, TypedDict, TYPE_CHECKING

import requests
from bs4 import BeautifulSoup
from requests import RequestException

from pip_rating.sources.base import SourceBase

if TYPE_CHECKING:
    from pip_rating.packages import Package

SOURCERANK_URL = "https://libraries.io/pypi/{package_name}/sourcerank"
BREAKDOWN_MAPPING = {
    "Basic info present?": "basic_info_present",
    "Source repository present?": "source_repository_present",
    "Readme present?": "readme_present",
    "License present?": "license_present",
    "Has multiple versions?": "has_multiple_versions",
    "Follows SemVer?": "follows_semver",
    "Recent release?": "recent_release",
    "Not brand new?": "not_brand_new",
    "1.0.0 or greater?": "is_1_or_greater",
    "Dependent Packages": "dependent_projects",
    "Dependent Repositories": "dependent_repositories",
    "Stars": "stars",
    "Contributors": "contributors",
    "Libraries.io subscribers": "librariesio_subscribers",
    "Total": "total",
}
RETRY_WAIT = 30


[docs]class SourceRankBreakdown(TypedDict): basic_info_present: int source_repository_present: int readme_present: int license_present: int has_multiple_versions: int follows_semver: int recent_release: int not_brand_new: int is_1_or_greater: int dependent_projects: int dependent_repositories: int stars: int contributors: int librariesio_subscribers: int total: int
[docs]class SourceRankCacheDict(TypedDict): package_name: str updated_at: str breakdown: SourceRankBreakdown
[docs]class SourceRank(SourceBase): source_name = "sourcerank" def __init__(self, package: "Package"): self.package = package super().__init__(package.name)
[docs] def get_cache_data(self) -> dict: return { "package_name": self.package_name, "updated_at": datetime.datetime.now().isoformat(), "breakdown": dict(self.get_breakdown()), }
@cached_property def breakdown(self) -> SourceRankBreakdown: if not self.is_cache_expired: cache = self.get_from_cache() else: cache = self.save_to_cache() return cache["breakdown"]
[docs] def request(self) -> bytes: """Request the sourcerank page and return the content""" with requests.get( SOURCERANK_URL.format(package_name=self.package.real_name) ) as response: try: response.raise_for_status() except RequestException as e: if e.response is not None and e.response.status_code == 429: self.package.dependencies.results.progress_console.print( f"Reached request limit for Sourcerank, waiting {RETRY_WAIT} seconds" ) time.sleep(RETRY_WAIT) return self.request() return response.content
[docs] def get_breakdown(self) -> Iterator[Tuple[str, int]]: soup = BeautifulSoup(self.request(), "html.parser") for item in soup.find_all("li", "list-group-item"): stripped_strings = list(item.stripped_strings) if stripped_strings[1] in BREAKDOWN_MAPPING: yield BREAKDOWN_MAPPING[stripped_strings[1]], int(stripped_strings[0])