diff --git a/README.md b/README.md index 69b436c..d143fe3 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ tailwindcss -i styles/globals.css -o static/globals.css --watch --m 3. Start browser-sync. This hot reloads the website when the html template files are modified: ```sh -browser-sync http://localhost:8000 --files templates/** +browser-sync http://localhost:8000 --files templates/** --files app/** ``` **NOTE**: Website has to be visited at http://localhost:3000 instead. diff --git a/app/util/prowlarr.py b/app/util/prowlarr.py index d0a3e22..970a4b0 100644 --- a/app/util/prowlarr.py +++ b/app/util/prowlarr.py @@ -133,7 +133,7 @@ async def query_prowlarr( params: dict[str, Any] = { "query": query, - "categories": 3000, + "categories": 3030, # Audio/Audiobook "type": "search", "limit": 100, "offset": 0, diff --git a/app/util/ranking/download_ranking.py b/app/util/ranking/download_ranking.py index 6a2633e..6318644 100644 --- a/app/util/ranking/download_ranking.py +++ b/app/util/ranking/download_ranking.py @@ -1,11 +1,15 @@ import asyncio -from aiohttp import ClientSession +from functools import cmp_to_key +from typing import Callable + import pydantic +from aiohttp import ClientSession +from rapidfuzz import fuzz, utils from sqlmodel import Session + from app.models import BookRequest, ProwlarrSource from app.util.ranking.quality import QualityProfile from app.util.ranking.quality_extract import Quality, extract_qualities -from functools import cmp_to_key class RankSource(pydantic.BaseModel): @@ -30,7 +34,6 @@ async def rank_sources( rank_sources = [x for y in await asyncio.gather(*coros) for x in y] compare = CompareSource(quality_profile, book) - # TODO: check if the ordering is working as expected rank_sources.sort(key=cmp_to_key(compare)) return [rs.source for rs in rank_sources] @@ -40,13 +43,33 @@ class CompareSource: def __init__(self, quality_profile: QualityProfile, book: BookRequest): self.book = book self.quality_profile = quality_profile + self.compare_order = [ + self._compare_valid, + self._compare_title, + self._compare_authors, + self._compare_narrators, + self._compare_format, + self._compare_flags, + self._compare_indexer, + self._compare_subtitle, + self._compare_seeders, + ] def __call__(self, a: RankSource, b: RankSource): return self.compare(a, b) def compare(self, a: RankSource, b: RankSource) -> int: - # TODO: allow customizing of order - return self._compare_quality(a, b) + return self._get_next_compare(0)(a, b, 1) + + def _get_next_compare( + self, index: int + ) -> Callable[[RankSource, RankSource, int], int]: + def default_compare(a: RankSource, b: RankSource, next_compare: int) -> int: + return 0 + + if index < len(self.compare_order): + return self.compare_order[index] + return default_compare def _is_valid_quality(self, a: RankSource) -> bool: match a.quality.file_format: @@ -56,71 +79,115 @@ class CompareSource: quality_range = self.quality_profile.M4B case "mp3": quality_range = self.quality_profile.MP3 + case "unknown-audio": + quality_range = self.quality_profile.UNKNOWN_AUDIO case "unknown": quality_range = self.quality_profile.UNKNOWN return quality_range[0] < a.quality.kbits < quality_range[1] - def _compare_quality(self, a: RankSource, b: RankSource) -> int: - a_valid = self._is_valid_quality(a) - b_valid = self._is_valid_quality(b) - if a_valid and not b_valid: - return -1 - if not a_valid and b_valid: - return 1 - return self._compare_format(a, b) + def _compare_valid(self, a: RankSource, b: RankSource, next_compare: int) -> int: + """Filter out any reasons that make it not valid""" + a_valid = self._is_valid_quality(a) and a.source.seeders > 0 + b_valid = self._is_valid_quality(b) and b.source.seeders > 0 + if a_valid == b_valid: + return self._get_next_compare(next_compare)(a, b, next_compare + 1) + return int(b_valid) - int(a_valid) - def _compare_format(self, a: RankSource, b: RankSource) -> int: + def _compare_format(self, a: RankSource, b: RankSource, next_compare: int) -> int: if a.quality.file_format == b.quality.file_format: - return self._compare_flags(a, b) - a_index = self.quality_profile.get_quality_rank(a.quality.file_format) - b_index = self.quality_profile.get_quality_rank(b.quality.file_format) + return self._get_next_compare(next_compare)(a, b, next_compare + 1) + a_index = self.quality_profile.calculate_quality_rank(a.quality.file_format) + b_index = self.quality_profile.calculate_quality_rank(b.quality.file_format) return a_index - b_index - def _compare_flags(self, a: RankSource, b: RankSource) -> int: + def _compare_flags(self, a: RankSource, b: RankSource, next_compare: int) -> int: a_score = sum( points - for flag, points in self.quality_profile.flags + for flag, points in self.quality_profile.indexer_flags if flag.lower() in a.source.indexer_flags ) b_score = sum( points - for flag, points in self.quality_profile.flags + for flag, points in self.quality_profile.indexer_flags if flag.lower() in b.source.indexer_flags ) if a_score == b_score: - return self._compare_indexer(a, b) - return a_score - b_score - - def _compare_indexer(self, a: RankSource, b: RankSource) -> int: - a_index = self.quality_profile.get_indexer_rank(a.source.indexer_id) - b_index = self.quality_profile.get_indexer_rank(b.source.indexer_id) - if a_index == b_index: - return self._compare_by_name(a, b) - return a_index - b_index - - def _compare_by_name(self, a: RankSource, b: RankSource) -> int: - a_score = add_scores(self.book, a) - b_score = add_scores(self.book, b) - if a_score == b_score: - return self._compare_seeders(a, b) + return self._get_next_compare(next_compare)(a, b, next_compare + 1) return b_score - a_score - def _compare_seeders(self, a: RankSource, b: RankSource) -> int: + def _compare_indexer(self, a: RankSource, b: RankSource, next_compare: int) -> int: + a_index = self.quality_profile.calculate_indexer_rank(a.source.indexer_id) + b_index = self.quality_profile.calculate_indexer_rank(b.source.indexer_id) + if a_index == b_index: + return self._get_next_compare(next_compare)(a, b, next_compare + 1) + return a_index - b_index + + def _compare_title(self, a: RankSource, b: RankSource, next_compare: int) -> int: + a_title = exists_in_title( + self.book.title, a.source.title, self.quality_profile.title_exists_ratio + ) + b_title = exists_in_title( + self.book.title, b.source.title, self.quality_profile.title_exists_ratio + ) + if a_title == b_title: + return self._get_next_compare(next_compare)(a, b, next_compare + 1) + return int(b_title) - int(a_title) + + def _compare_subtitle(self, a: RankSource, b: RankSource, next_compare: int) -> int: + if not self.book.subtitle: + return self._get_next_compare(next_compare)(a, b, next_compare + 1) + a_title = exists_in_title( + self.book.subtitle, a.source.title, self.quality_profile.title_exists_ratio + ) + b_title = exists_in_title( + self.book.subtitle, b.source.title, self.quality_profile.title_exists_ratio + ) + if a_title == b_title: + return self._get_next_compare(next_compare)(a, b, next_compare + 1) + return int(b_title) - int(a_title) + + def _compare_authors(self, a: RankSource, b: RankSource, next_compare: int) -> int: + a_score = vaguely_exist_in_title( + self.book.authors, a.source.title, self.quality_profile.name_exists_ratio + ) + b_score = vaguely_exist_in_title( + self.book.authors, b.source.title, self.quality_profile.name_exists_ratio + ) + if a_score == b_score: + return self._get_next_compare(next_compare)(a, b, next_compare + 1) + return b_score - a_score + + def _compare_narrators( + self, a: RankSource, b: RankSource, next_compare: int + ) -> int: + a_score = vaguely_exist_in_title( + self.book.narrators, a.source.title, self.quality_profile.name_exists_ratio + ) + b_score = vaguely_exist_in_title( + self.book.narrators, b.source.title, self.quality_profile.name_exists_ratio + ) + if a_score == b_score: + return self._get_next_compare(next_compare)(a, b, next_compare + 1) + return b_score - a_score + + def _compare_seeders(self, a: RankSource, b: RankSource, next_compare: int) -> int: + if a.source.seeders == b.source.seeders: + return self._get_next_compare(next_compare)(a, b, next_compare + 1) return b.source.seeders - a.source.seeders -def add_scores(book: BookRequest, a: RankSource) -> int: - score = 0 - if book.title not in a.source.title: - score -= 100 +def vaguely_exist_in_title(words: list[str], title: str, name_exists_ratio: int) -> int: + return sum( + 1 + for w in words + if fuzz.token_set_ratio(w, title, processor=utils.default_process) + > name_exists_ratio + ) - for author in book.authors: - if author in a.source.title: - score += 10 - for narrator in book.narrators: - if narrator not in book.authors and narrator in a.source.title: - score += 20 - - return score +def exists_in_title(word: str, title: str, title_exists_ratio: int) -> bool: + return ( + fuzz.partial_ratio(word, title, processor=utils.default_process) + > title_exists_ratio + ) diff --git a/app/util/ranking/quality.py b/app/util/ranking/quality.py index 46cbf19..7253d06 100644 --- a/app/util/ranking/quality.py +++ b/app/util/ranking/quality.py @@ -1,31 +1,32 @@ -import math from typing import Literal import pydantic -FileFormat = Literal["flac", "m4b", "mp3", "unknown"] + +FileFormat = Literal["flac", "m4b", "mp3", "unknown-audio", "unknown"] class QualityProfile(pydantic.BaseModel): - FLAC: tuple[float, float] = (0, math.inf) - M4B: tuple[float, float] = (0, math.inf) - MP3: tuple[float, float] = (0, math.inf) - UNKNOWN: tuple[float, float] = (0, math.inf) + FLAC: tuple[float, float] = (20.0, 400.0) + M4B: tuple[float, float] = (20.0, 400.0) + MP3: tuple[float, float] = (20.0, 400.0) + UNKNOWN_AUDIO: tuple[float, float] = (20.0, 400.0) + UNKNOWN: tuple[float, float] = (20.0, 400.0) - flags: list[tuple[str, int]] = [] - - format_order: list[FileFormat] = ["flac", "m4b", "mp3", "unknown"] + indexer_flags: list[tuple[str, int]] = [] + format_order: list[FileFormat] = ["flac", "m4b", "mp3", "unknown-audio", "unknown"] """Order of file formats from highest to lowest quality""" - indexer_order: list[int] = [] """Order of indexers from highest to lowest quality""" + name_exists_ratio: int = 75 + title_exists_ratio: int = 90 - def get_quality_rank(self, file_format: FileFormat) -> int: + def calculate_quality_rank(self, file_format: FileFormat) -> int: try: return self.format_order.index(file_format) except ValueError: return len(self.format_order) - def get_indexer_rank(self, indexer_id: int) -> int: + def calculate_indexer_rank(self, indexer_id: int) -> int: try: return self.indexer_order.index(indexer_id) except ValueError: diff --git a/app/util/ranking/quality_extract.py b/app/util/ranking/quality_extract.py index f882605..705f9ff 100644 --- a/app/util/ranking/quality_extract.py +++ b/app/util/ranking/quality_extract.py @@ -12,6 +12,10 @@ import os from app.models import BookRequest, ProwlarrSource from app.util.ranking.quality import FileFormat +# HACK: Disabled because it doesn't work well with ratelimiting +# We instead completely rely on the title and size of the complete torrent +ENABLE_TORRENT_INSPECTION = False + class Quality(pydantic.BaseModel): kbits: float @@ -77,9 +81,11 @@ async def extract_qualities( raise ValueError("Prowlarr API key not set") book_seconds = book.runtime_length_min * 60 + if book_seconds == 0: + return [] data = None - if source.download_url: + if source.download_url and ENABLE_TORRENT_INSPECTION: try: for _ in range(3): async with client_session.get( @@ -93,45 +99,63 @@ async def extract_qualities( else: return [] except aiohttp.NonHttpUrlRedirectClientError as e: - print(e.args) # tuple. first element is a magnet link - return [] + source.magnet_url = e.args[0] + source.download_url = None - if not data: - return [] - # TODO: correctly fix wrong torrent parsing - parsed = tp.decode(data, hash_fields={"pieces": (1, False)}) - actual_sizes: dict[FileFormat, int] = defaultdict(int) - file_formats = set() - if "info" not in parsed or "files" not in parsed["info"]: - return [] - for f in parsed["info"]["files"]: - size: int = f["length"] - path: str = f["path"][-1] - _, ext = os.path.splitext(path) - ext = ext.lower() - if ext == ".flac": - file_formats.add("flac") - actual_sizes["flac"] += size - elif ext == ".m4b": - file_formats.add("m4b") - actual_sizes["m4b"] += size - elif ext == ".mp3": - file_formats.add("mp3") - actual_sizes["mp3"] += size - elif ext in audio_file_formats: - file_formats.add("unknown") - actual_sizes["unknown"] += size - - qualities = [] - for k, v in actual_sizes.items(): - qualities.append( - Quality( - kbits=v / (book_seconds * 60) / 1024 * 8, - file_format=k, - ) - ) - return qualities + if data: + return get_torrent_info(data, book_seconds) # TODO: use the magnet url to fetch the file information - return [] + file_format: FileFormat = "unknown" + if "mp3" in source.title.lower(): + file_format = "mp3" + elif "flac" in source.title.lower(): + file_format = "flac" + elif "m4b" in source.title.lower(): + file_format = "m4b" + elif "audiobook" in source.title.lower(): + file_format = "unknown-audio" + + return [ + Quality(kbits=8 * source.size / book_seconds / 1000, file_format=file_format) + ] + + +def get_torrent_info(data: bytes, book_seconds: int) -> list[Quality]: + try: + # TODO: correctly fix wrong torrent parsing + parsed = tp.decode(data, hash_fields={"pieces": (1, False)}) + except tp.InvalidTorrentDataException: + return [] + actual_sizes: dict[FileFormat, int] = defaultdict(int) + file_formats = set() + if "info" not in parsed or "files" not in parsed["info"]: + return [] + for f in parsed["info"]["files"]: + size: int = f["length"] + path: str = f["path"][-1] + _, ext = os.path.splitext(path) + ext = ext.lower() + if ext == ".flac": + file_formats.add("flac") + actual_sizes["flac"] += size + elif ext == ".m4b": + file_formats.add("m4b") + actual_sizes["m4b"] += size + elif ext == ".mp3": + file_formats.add("mp3") + actual_sizes["mp3"] += size + elif ext in audio_file_formats: + file_formats.add("unknown") + actual_sizes["unknown"] += size + + qualities = [] + for k, v in actual_sizes.items(): + qualities.append( + Quality( + kbits=8 * v / book_seconds / 1000, + file_format=k, + ) + ) + return qualities diff --git a/requirements.txt b/requirements.txt index 8201011..7b22665 100644 --- a/requirements.txt +++ b/requirements.txt @@ -45,6 +45,7 @@ pytailwindcss==0.2.0 python-dotenv==1.0.1 python-multipart==0.0.20 PyYAML==6.0.2 +RapidFuzz==3.12.1 rich==13.9.4 rich-toolkit==0.13.2 shellingham==1.5.4 diff --git a/templates/sources.html b/templates/sources.html index 6712176..065569d 100644 --- a/templates/sources.html +++ b/templates/sources.html @@ -38,6 +38,7 @@ title indexer + flags seed / leech size (MB) publish date @@ -55,7 +56,7 @@ {{ source.title }} {{ indexers[source.indexer_id].name }} - + {{ source.indexer_flags|join(', ') }} {{ source.seeders }} / {{ source.leechers }} {{ source.size_MB }} {{ source.publish_date.strftime("%d. %b %Y") }}