diff --git a/app/internal/indexers/mam.py b/app/internal/indexers/mam.py new file mode 100644 index 0000000..e71fdc4 --- /dev/null +++ b/app/internal/indexers/mam.py @@ -0,0 +1,154 @@ +import json +import logging +from datetime import datetime +from typing import Any, Literal, Optional +from urllib.parse import urlencode, urljoin + +from aiohttp import ClientSession +from sqlmodel import Session + +from app.internal.models import ( + TorrentSource, + ProwlarrSource, +) +from app.util.cache import SimpleCache, StringConfigCache + +logger = logging.getLogger(__name__) + + +class MamMisconfigured(ValueError): + pass + + +MamConfigKey = Literal["mam_session_id", "mam_source_ttl", "mam_active"] + + +class MamConfig(StringConfigCache[MamConfigKey]): + def raise_if_invalid(self, session: Session): + if not self.get_session_id(session): + raise MamMisconfigured("mam_id not set") + + def is_valid(self, session: Session) -> bool: + return ( + self.get_session_id(session) is not None + and self.get_session_id(session) != "" + ) + + def get_session_id(self, session: Session) -> Optional[str]: + return self.get(session, "mam_session_id") + + def set_mam_id(self, session: Session, mam_id: str): + self.set(session, "mam_session_id", mam_id) + + def get_source_ttl(self, session: Session) -> int: + return self.get_int(session, "mam_source_ttl", 24 * 60 * 60) + + def set_source_ttl(self, session: Session, source_ttl: int): + self.set_int(session, "mam_source_ttl", source_ttl) + + def is_active(self, session: Session) -> bool: + return self.get(session, "mam_active") == "True" + + def set_active(self, session: Session, state: bool): + self.set(session, "mam_active", str(state)) + + +mam_config = MamConfig() +mam_source_cache = SimpleCache[dict[str, TorrentSource]]() + + +def flush_Mam_cache(): + mam_source_cache.flush() + + +# Downloading is still handled via prowlarr. + + +async def query_mam( + session: Session, + client_session: ClientSession, + query: Optional[str], + force_refresh: bool = False, +) -> dict[str, TorrentSource]: + if not query: + return dict() + + session_id = mam_config.get_session_id(session) + assert session_id is not None + + if not force_refresh: + source_ttl = mam_config.get_source_ttl(session) + cached_sources = mam_source_cache.get(source_ttl, "mam_" + query) + if cached_sources: + return cached_sources + params: dict[str, Any] = { + "tor[text]": query, # book title + author(s) + "tor[main_cat]": [13], # MAM audiobook category + "tor[searchIn]": "torrents", + "tor[srchIn][author]": "true", + "tor[srchIn][title]": "true", + "tor[searchType]": "active", # only search for torrents with at least 1 seeder. + "startNumber": 0, + "perpage": 100, + } + + base_url = "https://www.myanonamouse.net" + url = urljoin( + base_url, f"/tor/js/loadSearchJSONbasic.php?{urlencode(params, doseq=True)}" + ) + + logger.info("Querying Mam: %s", url) + + async with client_session.get(url, cookies={"mam_id": session_id}) as response: + search_results = await response.json() + # Storing in dict for faster retrieval by guid + sources: dict[str, TorrentSource] = dict() + + for result in search_results["data"]: + # TODO reduce to just authors / narrator unless there is a use for the other data. + sources.update( + { + f'https://www.myanonamouse.net/t/{result["id"]}': TorrentSource( + protocol="torrent", + guid=f'https://www.myanonamouse.net/t/{result["id"]}', + indexer_id=-1, # We don't know MAM's id within prowlarr. + indexer="MyAnonamouse", + title=result["title"], + seeders=result.get("seeders", 0), + leechers=result.get("leechers", 0), + size=-1, + info_url=f'https://www.myanonamouse.net/t/{result["id"]}', + indexer_flags=( + ["freeleech"] if result["personal_freeleech"] == 1 else [] + ), # TODO add differentiate between freeleech and VIP freeleech availible flags in result: [free, fl_vip, personal_freeleech] + publish_date=datetime.fromisoformat(result["added"]), + authors=( + list(json.loads(result["author_info"]).values()) + if result["author_info"] + else [] + ), + narrators=( + list(json.loads(result["narrator_info"]).values()) + if result["narrator_info"] + else [] + ), + ) + } + ) + + mam_source_cache.set(sources, "mam_" + query) + + return sources + + +def inject_mam_metadata( + prowlarrData: list[ProwlarrSource], mamData: dict[str, TorrentSource] +) -> list[ProwlarrSource]: + for p in prowlarrData: + m = mamData.get(p.guid) + if m is None: + continue + p.authors = m.authors + p.narrators = m.narrators + + return prowlarrData diff --git a/app/internal/models.py b/app/internal/models.py index 5e8b233..4e2044e 100644 --- a/app/internal/models.py +++ b/app/internal/models.py @@ -138,6 +138,8 @@ class BaseSource(BaseModel): indexer_id: int indexer: str title: str + authors: list[str] = Field(default_factory=list, sa_column=Column(JSON)) + narrators: list[str] = Field(default_factory=list, sa_column=Column(JSON)) size: int # in bytes publish_date: datetime info_url: Optional[str] diff --git a/app/internal/query.py b/app/internal/query.py index 89a9c98..4700f95 100644 --- a/app/internal/query.py +++ b/app/internal/query.py @@ -12,6 +12,8 @@ from app.internal.prowlarr.prowlarr import ( query_prowlarr, start_download, ) + +from app.internal.indexers.mam import mam_config, query_mam, inject_mam_metadata from app.internal.ranking.download_ranking import rank_sources querying: set[str] = set() @@ -61,6 +63,16 @@ async def query_sources( query, force_refresh=force_refresh, ) + if mam_config.is_active(session): + mam_config.raise_if_invalid(session) + + mam_sources = await query_mam( + session, + client_session, + query, + force_refresh=force_refresh, + ) + sources = inject_mam_metadata(prowlarrData=sources, mamData=mam_sources) ranked = await rank_sources(session, client_session, sources, book) diff --git a/app/internal/ranking/download_ranking.py b/app/internal/ranking/download_ranking.py index ac17924..6cd81db 100644 --- a/app/internal/ranking/download_ranking.py +++ b/app/internal/ranking/download_ranking.py @@ -178,15 +178,29 @@ class CompareSource: return int(b_title) - int(a_title) def _compare_authors(self, a: RankSource, b: RankSource, next_compare: int) -> int: - a_score = vaguely_exist_in_title( - self.book.authors, - a.source.title, - quality_config.get_name_exists_ratio(self.session), + a_score = max( + vaguely_exist_in_title( + self.book.authors, + a.source.title, + quality_config.get_name_exists_ratio(self.session), + ), + fuzzy_author_narrator_match( + a.source.authors, + self.book.authors, + quality_config.get_name_exists_ratio(self.session), + ), ) - b_score = vaguely_exist_in_title( - self.book.authors, - b.source.title, - quality_config.get_name_exists_ratio(self.session), + b_score = max( + vaguely_exist_in_title( + self.book.authors, + b.source.title, + quality_config.get_name_exists_ratio(self.session), + ), + fuzzy_author_narrator_match( + b.source.authors, + self.book.authors, + quality_config.get_name_exists_ratio(self.session), + ), ) if a_score == b_score: return self._get_next_compare(next_compare)(a, b, next_compare + 1) @@ -195,15 +209,29 @@ class CompareSource: def _compare_narrators( self, a: RankSource, b: RankSource, next_compare: int ) -> int: - a_score = vaguely_exist_in_title( - self.book.narrators, - a.source.title, - quality_config.get_name_exists_ratio(self.session), + a_score = max( + vaguely_exist_in_title( + self.book.narrators, + a.source.title, + quality_config.get_name_exists_ratio(self.session), + ), + fuzzy_author_narrator_match( + a.source.narrators, + self.book.narrators, + quality_config.get_name_exists_ratio(self.session), + ), ) - b_score = vaguely_exist_in_title( - self.book.narrators, - b.source.title, - quality_config.get_name_exists_ratio(self.session), + b_score = max( + vaguely_exist_in_title( + self.book.narrators, + b.source.title, + quality_config.get_name_exists_ratio(self.session), + ), + fuzzy_author_narrator_match( + b.source.narrators, + self.book.narrators, + quality_config.get_name_exists_ratio(self.session), + ), ) if a_score == b_score: return self._get_next_compare(next_compare)(a, b, next_compare + 1) @@ -226,6 +254,28 @@ class CompareSource: return int((b.source.publish_date - a.source.publish_date).total_seconds()) +def fuzzy_author_narrator_match( + source_people: list[str], book_people: list[str], name_exists_ratio: int +) -> int: + """Calculate a fuzzy matching score between two lists of author/narrator names.""" + if not source_people or not book_people: + return 0 + score = 0 + for book_person in book_people: + best_match = 0 + for source_person in source_people: + match_score = fuzz.token_set_ratio( + book_person, source_person, processor=utils.default_process + ) + best_match = max(best_match, match_score) + + # Only count matches above threshold + if best_match > name_exists_ratio: + score += 1 + + return score + + def vaguely_exist_in_title(words: list[str], title: str, name_exists_ratio: int) -> int: return sum( 1 @@ -235,6 +285,10 @@ def vaguely_exist_in_title(words: list[str], title: str, name_exists_ratio: int) ) +def get_intersection_length(a: list[str], b: list[str]): + return len(set(a).intersection(set(b))) + + def exists_in_title(word: str, title: str, title_exists_ratio: int) -> bool: return ( fuzz.partial_ratio(word, title, processor=utils.default_process) diff --git a/app/routers/settings.py b/app/routers/settings.py index e558ec8..4e02a6b 100644 --- a/app/routers/settings.py +++ b/app/routers/settings.py @@ -6,6 +6,7 @@ from aiohttp import ClientResponseError, ClientSession from fastapi import APIRouter, Depends, Form, HTTPException, Request, Response from sqlmodel import Session, select + from app.internal.auth.authentication import ( DetailedUser, create_user, @@ -20,6 +21,7 @@ from app.internal.models import EventEnum, GroupEnum, Notification, User from app.internal.notifications import send_notification from app.internal.prowlarr.indexer_categories import indexer_categories from app.internal.prowlarr.prowlarr import flush_prowlarr_cache, prowlarr_config +from app.internal.indexers.mam import mam_config from app.internal.ranking.quality import IndexerFlag, QualityRange, quality_config from app.util.connection import get_connection from app.util.db import get_session @@ -212,6 +214,8 @@ def read_prowlarr( prowlarr_base_url = prowlarr_config.get_base_url(session) prowlarr_api_key = prowlarr_config.get_api_key(session) selected = set(prowlarr_config.get_categories(session)) + mam_is_active = mam_config.is_active(session) + mam_id = mam_config.get_session_id(session) return template_response( "settings_page/prowlarr.html", @@ -225,6 +229,8 @@ def read_prowlarr( "selected_categories": selected, "prowlarr_misconfigured": True if prowlarr_misconfigured else False, "version": Settings().app.version, + "mam_active": mam_is_active, + "mam_id": mam_id, }, ) @@ -279,6 +285,40 @@ def update_indexer_categories( ) +@router.put("/mam/mam_id") +def update_mam_id( + mam_id: Annotated[str, Form()], + session: Annotated[Session, Depends(get_session)], + admin_user: Annotated[ + DetailedUser, Depends(get_authenticated_user(GroupEnum.admin)) + ], +): + mam_config.set_mam_id(session, mam_id) + return Response(status_code=204, headers={"HX-Refresh": "true"}) + + +@router.put("/mam/activate") +def activate_mam( + session: Annotated[Session, Depends(get_session)], + admin_user: Annotated[ + DetailedUser, Depends(get_authenticated_user(GroupEnum.admin)) + ], +): + mam_config.set_active(session, True) + return Response(status_code=204, headers={"HX-Refresh": "true"}) + + +@router.put("/mam/deactivate") +def deactivate_mam( + session: Annotated[Session, Depends(get_session)], + admin_user: Annotated[ + DetailedUser, Depends(get_authenticated_user(GroupEnum.admin)) + ], +): + mam_config.set_active(session, False) + return Response(status_code=204, headers={"HX-Refresh": "true"}) + + @router.get("/download") def read_download( request: Request, diff --git a/app/routers/wishlist.py b/app/routers/wishlist.py index fe36108..41a9c6d 100644 --- a/app/routers/wishlist.py +++ b/app/routers/wishlist.py @@ -26,6 +26,7 @@ from app.internal.prowlarr.prowlarr import ( prowlarr_config, start_download, ) +from app.internal.indexers.mam import mam_config from app.internal.query import query_sources from app.internal.auth.authentication import DetailedUser, get_authenticated_user from app.util.connection import get_connection @@ -256,6 +257,7 @@ async def list_sources( { "book": result.book, "sources": result.sources, + "mam_active": mam_config.is_active(session), }, ) diff --git a/templates/settings_page/prowlarr.html b/templates/settings_page/prowlarr.html index 532b295..f14a378 100644 --- a/templates/settings_page/prowlarr.html +++ b/templates/settings_page/prowlarr.html @@ -111,5 +111,54 @@ {% endblock %} + +