quality/profiles ranking of torrents

This commit is contained in:
Markbeep
2025-02-21 00:50:13 +01:00
parent eaa410a7a1
commit ae771a2e14
8 changed files with 304 additions and 19 deletions
+3
View File
@@ -135,6 +135,9 @@ class ProwlarrSource(BaseModel):
size: int # in bytes
publish_date: datetime
info_url: str
indexer_flags: list[str]
download_url: Optional[str] = None
magnet_url: Optional[str] = None
download_score: int = 0
-17
View File
@@ -1,17 +0,0 @@
from app.models import ProwlarrSource
MIN_SEEDERS = 5
MIN_SEED_RATIO = 2.0
# TODO: This could be replaced with Readarr's quality/ranking system if that works well
def rank_sources(sources: list[ProwlarrSource]) -> list[ProwlarrSource]:
sorted_seeders = sorted(sources, key=lambda x: x.seeders, reverse=True)
for i, source in enumerate(sorted_seeders):
leechers = max(source.leechers, 1)
if source.seeders < MIN_SEEDERS or source.seeders / leechers < MIN_SEED_RATIO:
continue
source.download_score = len(sorted_seeders) - i
return sorted(sources, key=lambda x: x.download_score, reverse=True)
+3
View File
@@ -162,6 +162,9 @@ async def query_prowlarr(
leechers=result["leechers"],
size=result["size"],
info_url=result["infoUrl"],
indexer_flags=[x.lower() for x in result.get("indexerFlags", [])],
download_url=result.get("downloadUrl"),
magnet_url=result.get("magnetUrl"),
publish_date=datetime.fromisoformat(result["publishDate"]),
)
)
+2 -2
View File
@@ -6,7 +6,7 @@ import pydantic
from sqlmodel import Session, select
from app.models import BookRequest, Indexer, ProwlarrSource
from app.util.download_ranking import rank_sources
from app.util.ranking.download_ranking import rank_sources
from app.util.prowlarr import get_indexers, query_prowlarr, start_download
from app.util.prowlarr import prowlarr_config
@@ -69,7 +69,7 @@ async def query_sources(
else:
indexers = {}
ranked = rank_sources(sources)
ranked = await rank_sources(session, client_session, sources, book)
# start download if requested
if (
+126
View File
@@ -0,0 +1,126 @@
import asyncio
from aiohttp import ClientSession
import pydantic
from sqlmodel import Session
from app.models import BookRequest, ProwlarrSource
from app.util.ranking.quality import QualityProfile
from app.util.ranking.quality_extract import Quality, extract_qualities
from functools import cmp_to_key
class RankSource(pydantic.BaseModel):
source: ProwlarrSource
quality: Quality
async def rank_sources(
session: Session,
client_session: ClientSession,
sources: list[ProwlarrSource],
book: BookRequest,
) -> list[ProwlarrSource]:
# TODO: quality profile should be fetched from the config/db
quality_profile = QualityProfile()
async def get_qualities(source: ProwlarrSource):
qualities = await extract_qualities(session, client_session, source, book)
return [RankSource(source=source, quality=q) for q in qualities]
coros = [get_qualities(source) for source in sources]
rank_sources = [x for y in await asyncio.gather(*coros) for x in y]
compare = CompareSource(quality_profile, book)
# TODO: check if the ordering is working as expected
rank_sources.sort(key=cmp_to_key(compare))
return [rs.source for rs in rank_sources]
class CompareSource:
def __init__(self, quality_profile: QualityProfile, book: BookRequest):
self.book = book
self.quality_profile = quality_profile
def __call__(self, a: RankSource, b: RankSource):
return self.compare(a, b)
def compare(self, a: RankSource, b: RankSource) -> int:
# TODO: allow customizing of order
return self._compare_quality(a, b)
def _is_valid_quality(self, a: RankSource) -> bool:
match a.quality.file_format:
case "flac":
quality_range = self.quality_profile.FLAC
case "m4b":
quality_range = self.quality_profile.M4B
case "mp3":
quality_range = self.quality_profile.MP3
case "unknown":
quality_range = self.quality_profile.UNKNOWN
return quality_range[0] < a.quality.kbits < quality_range[1]
def _compare_quality(self, a: RankSource, b: RankSource) -> int:
a_valid = self._is_valid_quality(a)
b_valid = self._is_valid_quality(b)
if a_valid and not b_valid:
return -1
if not a_valid and b_valid:
return 1
return self._compare_format(a, b)
def _compare_format(self, a: RankSource, b: RankSource) -> int:
if a.quality.file_format == b.quality.file_format:
return self._compare_flags(a, b)
a_index = self.quality_profile.get_quality_rank(a.quality.file_format)
b_index = self.quality_profile.get_quality_rank(b.quality.file_format)
return a_index - b_index
def _compare_flags(self, a: RankSource, b: RankSource) -> int:
a_score = sum(
points
for flag, points in self.quality_profile.flags
if flag.lower() in a.source.indexer_flags
)
b_score = sum(
points
for flag, points in self.quality_profile.flags
if flag.lower() in b.source.indexer_flags
)
if a_score == b_score:
return self._compare_indexer(a, b)
return a_score - b_score
def _compare_indexer(self, a: RankSource, b: RankSource) -> int:
a_index = self.quality_profile.get_indexer_rank(a.source.indexer_id)
b_index = self.quality_profile.get_indexer_rank(b.source.indexer_id)
if a_index == b_index:
return self._compare_by_name(a, b)
return a_index - b_index
def _compare_by_name(self, a: RankSource, b: RankSource) -> int:
a_score = add_scores(self.book, a)
b_score = add_scores(self.book, b)
if a_score == b_score:
return self._compare_seeders(a, b)
return b_score - a_score
def _compare_seeders(self, a: RankSource, b: RankSource) -> int:
return b.source.seeders - a.source.seeders
def add_scores(book: BookRequest, a: RankSource) -> int:
score = 0
if book.title not in a.source.title:
score -= 100
for author in book.authors:
if author in a.source.title:
score += 10
for narrator in book.narrators:
if narrator not in book.authors and narrator in a.source.title:
score += 20
return score
+32
View File
@@ -0,0 +1,32 @@
import math
from typing import Literal
import pydantic
FileFormat = Literal["flac", "m4b", "mp3", "unknown"]
class QualityProfile(pydantic.BaseModel):
FLAC: tuple[float, float] = (0, math.inf)
M4B: tuple[float, float] = (0, math.inf)
MP3: tuple[float, float] = (0, math.inf)
UNKNOWN: tuple[float, float] = (0, math.inf)
flags: list[tuple[str, int]] = []
format_order: list[FileFormat] = ["flac", "m4b", "mp3", "unknown"]
"""Order of file formats from highest to lowest quality"""
indexer_order: list[int] = []
"""Order of indexers from highest to lowest quality"""
def get_quality_rank(self, file_format: FileFormat) -> int:
try:
return self.format_order.index(file_format)
except ValueError:
return len(self.format_order)
def get_indexer_rank(self, indexer_id: int) -> int:
try:
return self.indexer_order.index(indexer_id)
except ValueError:
return len(self.format_order)
+137
View File
@@ -0,0 +1,137 @@
# pyright: basic
from collections import defaultdict
from aiohttp import ClientSession
import aiohttp
import pydantic
from sqlmodel import Session
from app.util.prowlarr import prowlarr_config
import torrent_parser as tp
import os
from app.models import BookRequest, ProwlarrSource
from app.util.ranking.quality import FileFormat
class Quality(pydantic.BaseModel):
kbits: float
file_format: FileFormat
audio_file_formats = [
".3gp",
".aa",
".aac",
".aax",
".act",
".aiff",
".alac",
".amr",
".ape",
".au",
".awb",
".dss",
".dvf",
".flac",
".gsm",
".iklax",
".ivs",
".m4a",
".m4b",
".m4p",
".mmf",
".movpkg",
".mp3",
".mpc",
".msv",
".nmf",
".ogg",
".oga",
".mogg",
".opus",
".ra",
".rm",
".raw",
".rf64",
".sln",
".tta",
".voc",
".vox",
".wav",
".wma",
".wv",
".webm",
".8svx",
".cda",
]
async def extract_qualities(
session: Session,
client_session: ClientSession,
source: ProwlarrSource,
book: BookRequest,
) -> list[Quality]:
api_key = prowlarr_config.get_api_key(session)
if not api_key:
raise ValueError("Prowlarr API key not set")
book_seconds = book.runtime_length_min * 60
data = None
if source.download_url:
try:
for _ in range(3):
async with client_session.get(
source.download_url,
headers={"X-Api-Key": api_key},
) as response:
if response.status == 500:
continue
data = await response.read()
break
else:
return []
except aiohttp.NonHttpUrlRedirectClientError as e:
print(e.args) # tuple. first element is a magnet link
return []
if not data:
return []
# TODO: correctly fix wrong torrent parsing
parsed = tp.decode(data, hash_fields={"pieces": (1, False)})
actual_sizes: dict[FileFormat, int] = defaultdict(int)
file_formats = set()
if "info" not in parsed or "files" not in parsed["info"]:
return []
for f in parsed["info"]["files"]:
size: int = f["length"]
path: str = f["path"][-1]
_, ext = os.path.splitext(path)
ext = ext.lower()
if ext == ".flac":
file_formats.add("flac")
actual_sizes["flac"] += size
elif ext == ".m4b":
file_formats.add("m4b")
actual_sizes["m4b"] += size
elif ext == ".mp3":
file_formats.add("mp3")
actual_sizes["mp3"] += size
elif ext in audio_file_formats:
file_formats.add("unknown")
actual_sizes["unknown"] += size
qualities = []
for k, v in actual_sizes.items():
qualities.append(
Quality(
kbits=v / (book_seconds * 60) / 1024 * 8,
file_format=k,
)
)
return qualities
# TODO: use the magnet url to fetch the file information
return []
+1
View File
@@ -52,6 +52,7 @@ sniffio==1.3.1
SQLAlchemy==2.0.38
sqlmodel==0.0.22
starlette==0.45.3
torrent-parser==0.4.1
typer==0.15.1
typing_extensions==4.12.2
uvicorn==0.34.0