mirror of
https://github.com/markbeep/AudioBookRequest.git
synced 2026-04-30 13:29:38 -05:00
quality/profiles ranking of torrents
This commit is contained in:
@@ -135,6 +135,9 @@ class ProwlarrSource(BaseModel):
|
||||
size: int # in bytes
|
||||
publish_date: datetime
|
||||
info_url: str
|
||||
indexer_flags: list[str]
|
||||
download_url: Optional[str] = None
|
||||
magnet_url: Optional[str] = None
|
||||
|
||||
download_score: int = 0
|
||||
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
from app.models import ProwlarrSource
|
||||
|
||||
MIN_SEEDERS = 5
|
||||
MIN_SEED_RATIO = 2.0
|
||||
|
||||
|
||||
# TODO: This could be replaced with Readarr's quality/ranking system if that works well
|
||||
def rank_sources(sources: list[ProwlarrSource]) -> list[ProwlarrSource]:
|
||||
sorted_seeders = sorted(sources, key=lambda x: x.seeders, reverse=True)
|
||||
for i, source in enumerate(sorted_seeders):
|
||||
leechers = max(source.leechers, 1)
|
||||
if source.seeders < MIN_SEEDERS or source.seeders / leechers < MIN_SEED_RATIO:
|
||||
continue
|
||||
|
||||
source.download_score = len(sorted_seeders) - i
|
||||
|
||||
return sorted(sources, key=lambda x: x.download_score, reverse=True)
|
||||
@@ -162,6 +162,9 @@ async def query_prowlarr(
|
||||
leechers=result["leechers"],
|
||||
size=result["size"],
|
||||
info_url=result["infoUrl"],
|
||||
indexer_flags=[x.lower() for x in result.get("indexerFlags", [])],
|
||||
download_url=result.get("downloadUrl"),
|
||||
magnet_url=result.get("magnetUrl"),
|
||||
publish_date=datetime.fromisoformat(result["publishDate"]),
|
||||
)
|
||||
)
|
||||
|
||||
+2
-2
@@ -6,7 +6,7 @@ import pydantic
|
||||
from sqlmodel import Session, select
|
||||
|
||||
from app.models import BookRequest, Indexer, ProwlarrSource
|
||||
from app.util.download_ranking import rank_sources
|
||||
from app.util.ranking.download_ranking import rank_sources
|
||||
from app.util.prowlarr import get_indexers, query_prowlarr, start_download
|
||||
from app.util.prowlarr import prowlarr_config
|
||||
|
||||
@@ -69,7 +69,7 @@ async def query_sources(
|
||||
else:
|
||||
indexers = {}
|
||||
|
||||
ranked = rank_sources(sources)
|
||||
ranked = await rank_sources(session, client_session, sources, book)
|
||||
|
||||
# start download if requested
|
||||
if (
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
import asyncio
|
||||
from aiohttp import ClientSession
|
||||
import pydantic
|
||||
from sqlmodel import Session
|
||||
from app.models import BookRequest, ProwlarrSource
|
||||
from app.util.ranking.quality import QualityProfile
|
||||
from app.util.ranking.quality_extract import Quality, extract_qualities
|
||||
from functools import cmp_to_key
|
||||
|
||||
|
||||
class RankSource(pydantic.BaseModel):
|
||||
source: ProwlarrSource
|
||||
quality: Quality
|
||||
|
||||
|
||||
async def rank_sources(
|
||||
session: Session,
|
||||
client_session: ClientSession,
|
||||
sources: list[ProwlarrSource],
|
||||
book: BookRequest,
|
||||
) -> list[ProwlarrSource]:
|
||||
# TODO: quality profile should be fetched from the config/db
|
||||
quality_profile = QualityProfile()
|
||||
|
||||
async def get_qualities(source: ProwlarrSource):
|
||||
qualities = await extract_qualities(session, client_session, source, book)
|
||||
return [RankSource(source=source, quality=q) for q in qualities]
|
||||
|
||||
coros = [get_qualities(source) for source in sources]
|
||||
rank_sources = [x for y in await asyncio.gather(*coros) for x in y]
|
||||
|
||||
compare = CompareSource(quality_profile, book)
|
||||
# TODO: check if the ordering is working as expected
|
||||
rank_sources.sort(key=cmp_to_key(compare))
|
||||
|
||||
return [rs.source for rs in rank_sources]
|
||||
|
||||
|
||||
class CompareSource:
|
||||
def __init__(self, quality_profile: QualityProfile, book: BookRequest):
|
||||
self.book = book
|
||||
self.quality_profile = quality_profile
|
||||
|
||||
def __call__(self, a: RankSource, b: RankSource):
|
||||
return self.compare(a, b)
|
||||
|
||||
def compare(self, a: RankSource, b: RankSource) -> int:
|
||||
# TODO: allow customizing of order
|
||||
return self._compare_quality(a, b)
|
||||
|
||||
def _is_valid_quality(self, a: RankSource) -> bool:
|
||||
match a.quality.file_format:
|
||||
case "flac":
|
||||
quality_range = self.quality_profile.FLAC
|
||||
case "m4b":
|
||||
quality_range = self.quality_profile.M4B
|
||||
case "mp3":
|
||||
quality_range = self.quality_profile.MP3
|
||||
case "unknown":
|
||||
quality_range = self.quality_profile.UNKNOWN
|
||||
|
||||
return quality_range[0] < a.quality.kbits < quality_range[1]
|
||||
|
||||
def _compare_quality(self, a: RankSource, b: RankSource) -> int:
|
||||
a_valid = self._is_valid_quality(a)
|
||||
b_valid = self._is_valid_quality(b)
|
||||
if a_valid and not b_valid:
|
||||
return -1
|
||||
if not a_valid and b_valid:
|
||||
return 1
|
||||
return self._compare_format(a, b)
|
||||
|
||||
def _compare_format(self, a: RankSource, b: RankSource) -> int:
|
||||
if a.quality.file_format == b.quality.file_format:
|
||||
return self._compare_flags(a, b)
|
||||
a_index = self.quality_profile.get_quality_rank(a.quality.file_format)
|
||||
b_index = self.quality_profile.get_quality_rank(b.quality.file_format)
|
||||
return a_index - b_index
|
||||
|
||||
def _compare_flags(self, a: RankSource, b: RankSource) -> int:
|
||||
a_score = sum(
|
||||
points
|
||||
for flag, points in self.quality_profile.flags
|
||||
if flag.lower() in a.source.indexer_flags
|
||||
)
|
||||
b_score = sum(
|
||||
points
|
||||
for flag, points in self.quality_profile.flags
|
||||
if flag.lower() in b.source.indexer_flags
|
||||
)
|
||||
if a_score == b_score:
|
||||
return self._compare_indexer(a, b)
|
||||
return a_score - b_score
|
||||
|
||||
def _compare_indexer(self, a: RankSource, b: RankSource) -> int:
|
||||
a_index = self.quality_profile.get_indexer_rank(a.source.indexer_id)
|
||||
b_index = self.quality_profile.get_indexer_rank(b.source.indexer_id)
|
||||
if a_index == b_index:
|
||||
return self._compare_by_name(a, b)
|
||||
return a_index - b_index
|
||||
|
||||
def _compare_by_name(self, a: RankSource, b: RankSource) -> int:
|
||||
a_score = add_scores(self.book, a)
|
||||
b_score = add_scores(self.book, b)
|
||||
if a_score == b_score:
|
||||
return self._compare_seeders(a, b)
|
||||
return b_score - a_score
|
||||
|
||||
def _compare_seeders(self, a: RankSource, b: RankSource) -> int:
|
||||
return b.source.seeders - a.source.seeders
|
||||
|
||||
|
||||
def add_scores(book: BookRequest, a: RankSource) -> int:
|
||||
score = 0
|
||||
if book.title not in a.source.title:
|
||||
score -= 100
|
||||
|
||||
for author in book.authors:
|
||||
if author in a.source.title:
|
||||
score += 10
|
||||
|
||||
for narrator in book.narrators:
|
||||
if narrator not in book.authors and narrator in a.source.title:
|
||||
score += 20
|
||||
|
||||
return score
|
||||
@@ -0,0 +1,32 @@
|
||||
import math
|
||||
from typing import Literal
|
||||
import pydantic
|
||||
|
||||
FileFormat = Literal["flac", "m4b", "mp3", "unknown"]
|
||||
|
||||
|
||||
class QualityProfile(pydantic.BaseModel):
|
||||
FLAC: tuple[float, float] = (0, math.inf)
|
||||
M4B: tuple[float, float] = (0, math.inf)
|
||||
MP3: tuple[float, float] = (0, math.inf)
|
||||
UNKNOWN: tuple[float, float] = (0, math.inf)
|
||||
|
||||
flags: list[tuple[str, int]] = []
|
||||
|
||||
format_order: list[FileFormat] = ["flac", "m4b", "mp3", "unknown"]
|
||||
"""Order of file formats from highest to lowest quality"""
|
||||
|
||||
indexer_order: list[int] = []
|
||||
"""Order of indexers from highest to lowest quality"""
|
||||
|
||||
def get_quality_rank(self, file_format: FileFormat) -> int:
|
||||
try:
|
||||
return self.format_order.index(file_format)
|
||||
except ValueError:
|
||||
return len(self.format_order)
|
||||
|
||||
def get_indexer_rank(self, indexer_id: int) -> int:
|
||||
try:
|
||||
return self.indexer_order.index(indexer_id)
|
||||
except ValueError:
|
||||
return len(self.format_order)
|
||||
@@ -0,0 +1,137 @@
|
||||
# pyright: basic
|
||||
|
||||
from collections import defaultdict
|
||||
from aiohttp import ClientSession
|
||||
import aiohttp
|
||||
import pydantic
|
||||
from sqlmodel import Session
|
||||
from app.util.prowlarr import prowlarr_config
|
||||
import torrent_parser as tp
|
||||
import os
|
||||
|
||||
from app.models import BookRequest, ProwlarrSource
|
||||
from app.util.ranking.quality import FileFormat
|
||||
|
||||
|
||||
class Quality(pydantic.BaseModel):
|
||||
kbits: float
|
||||
file_format: FileFormat
|
||||
|
||||
|
||||
audio_file_formats = [
|
||||
".3gp",
|
||||
".aa",
|
||||
".aac",
|
||||
".aax",
|
||||
".act",
|
||||
".aiff",
|
||||
".alac",
|
||||
".amr",
|
||||
".ape",
|
||||
".au",
|
||||
".awb",
|
||||
".dss",
|
||||
".dvf",
|
||||
".flac",
|
||||
".gsm",
|
||||
".iklax",
|
||||
".ivs",
|
||||
".m4a",
|
||||
".m4b",
|
||||
".m4p",
|
||||
".mmf",
|
||||
".movpkg",
|
||||
".mp3",
|
||||
".mpc",
|
||||
".msv",
|
||||
".nmf",
|
||||
".ogg",
|
||||
".oga",
|
||||
".mogg",
|
||||
".opus",
|
||||
".ra",
|
||||
".rm",
|
||||
".raw",
|
||||
".rf64",
|
||||
".sln",
|
||||
".tta",
|
||||
".voc",
|
||||
".vox",
|
||||
".wav",
|
||||
".wma",
|
||||
".wv",
|
||||
".webm",
|
||||
".8svx",
|
||||
".cda",
|
||||
]
|
||||
|
||||
|
||||
async def extract_qualities(
|
||||
session: Session,
|
||||
client_session: ClientSession,
|
||||
source: ProwlarrSource,
|
||||
book: BookRequest,
|
||||
) -> list[Quality]:
|
||||
api_key = prowlarr_config.get_api_key(session)
|
||||
if not api_key:
|
||||
raise ValueError("Prowlarr API key not set")
|
||||
|
||||
book_seconds = book.runtime_length_min * 60
|
||||
|
||||
data = None
|
||||
if source.download_url:
|
||||
try:
|
||||
for _ in range(3):
|
||||
async with client_session.get(
|
||||
source.download_url,
|
||||
headers={"X-Api-Key": api_key},
|
||||
) as response:
|
||||
if response.status == 500:
|
||||
continue
|
||||
data = await response.read()
|
||||
break
|
||||
else:
|
||||
return []
|
||||
except aiohttp.NonHttpUrlRedirectClientError as e:
|
||||
print(e.args) # tuple. first element is a magnet link
|
||||
return []
|
||||
|
||||
if not data:
|
||||
return []
|
||||
# TODO: correctly fix wrong torrent parsing
|
||||
parsed = tp.decode(data, hash_fields={"pieces": (1, False)})
|
||||
actual_sizes: dict[FileFormat, int] = defaultdict(int)
|
||||
file_formats = set()
|
||||
if "info" not in parsed or "files" not in parsed["info"]:
|
||||
return []
|
||||
for f in parsed["info"]["files"]:
|
||||
size: int = f["length"]
|
||||
path: str = f["path"][-1]
|
||||
_, ext = os.path.splitext(path)
|
||||
ext = ext.lower()
|
||||
if ext == ".flac":
|
||||
file_formats.add("flac")
|
||||
actual_sizes["flac"] += size
|
||||
elif ext == ".m4b":
|
||||
file_formats.add("m4b")
|
||||
actual_sizes["m4b"] += size
|
||||
elif ext == ".mp3":
|
||||
file_formats.add("mp3")
|
||||
actual_sizes["mp3"] += size
|
||||
elif ext in audio_file_formats:
|
||||
file_formats.add("unknown")
|
||||
actual_sizes["unknown"] += size
|
||||
|
||||
qualities = []
|
||||
for k, v in actual_sizes.items():
|
||||
qualities.append(
|
||||
Quality(
|
||||
kbits=v / (book_seconds * 60) / 1024 * 8,
|
||||
file_format=k,
|
||||
)
|
||||
)
|
||||
return qualities
|
||||
|
||||
# TODO: use the magnet url to fetch the file information
|
||||
|
||||
return []
|
||||
@@ -52,6 +52,7 @@ sniffio==1.3.1
|
||||
SQLAlchemy==2.0.38
|
||||
sqlmodel==0.0.22
|
||||
starlette==0.45.3
|
||||
torrent-parser==0.4.1
|
||||
typer==0.15.1
|
||||
typing_extensions==4.12.2
|
||||
uvicorn==0.34.0
|
||||
|
||||
Reference in New Issue
Block a user