diff --git a/app/internal/indexers/mam.py b/app/internal/indexers/mam.py new file mode 100644 index 0000000..e71fdc4 --- /dev/null +++ b/app/internal/indexers/mam.py @@ -0,0 +1,154 @@ +import json +import logging +from datetime import datetime +from typing import Any, Literal, Optional +from urllib.parse import urlencode, urljoin + +from aiohttp import ClientSession +from sqlmodel import Session + +from app.internal.models import ( + TorrentSource, + ProwlarrSource, +) +from app.util.cache import SimpleCache, StringConfigCache + +logger = logging.getLogger(__name__) + + +class MamMisconfigured(ValueError): + pass + + +MamConfigKey = Literal["mam_session_id", "mam_source_ttl", "mam_active"] + + +class MamConfig(StringConfigCache[MamConfigKey]): + def raise_if_invalid(self, session: Session): + if not self.get_session_id(session): + raise MamMisconfigured("mam_id not set") + + def is_valid(self, session: Session) -> bool: + return ( + self.get_session_id(session) is not None + and self.get_session_id(session) != "" + ) + + def get_session_id(self, session: Session) -> Optional[str]: + return self.get(session, "mam_session_id") + + def set_mam_id(self, session: Session, mam_id: str): + self.set(session, "mam_session_id", mam_id) + + def get_source_ttl(self, session: Session) -> int: + return self.get_int(session, "mam_source_ttl", 24 * 60 * 60) + + def set_source_ttl(self, session: Session, source_ttl: int): + self.set_int(session, "mam_source_ttl", source_ttl) + + def is_active(self, session: Session) -> bool: + return self.get(session, "mam_active") == "True" + + def set_active(self, session: Session, state: bool): + self.set(session, "mam_active", str(state)) + + +mam_config = MamConfig() +mam_source_cache = SimpleCache[dict[str, TorrentSource]]() + + +def flush_Mam_cache(): + mam_source_cache.flush() + + +# Downloading is still handled via prowlarr. + + +async def query_mam( + session: Session, + client_session: ClientSession, + query: Optional[str], + force_refresh: bool = False, +) -> dict[str, TorrentSource]: + if not query: + return dict() + + session_id = mam_config.get_session_id(session) + assert session_id is not None + + if not force_refresh: + source_ttl = mam_config.get_source_ttl(session) + cached_sources = mam_source_cache.get(source_ttl, "mam_" + query) + if cached_sources: + return cached_sources + params: dict[str, Any] = { + "tor[text]": query, # book title + author(s) + "tor[main_cat]": [13], # MAM audiobook category + "tor[searchIn]": "torrents", + "tor[srchIn][author]": "true", + "tor[srchIn][title]": "true", + "tor[searchType]": "active", # only search for torrents with at least 1 seeder. + "startNumber": 0, + "perpage": 100, + } + + base_url = "https://www.myanonamouse.net" + url = urljoin( + base_url, f"/tor/js/loadSearchJSONbasic.php?{urlencode(params, doseq=True)}" + ) + + logger.info("Querying Mam: %s", url) + + async with client_session.get(url, cookies={"mam_id": session_id}) as response: + search_results = await response.json() + # Storing in dict for faster retrieval by guid + sources: dict[str, TorrentSource] = dict() + + for result in search_results["data"]: + # TODO reduce to just authors / narrator unless there is a use for the other data. + sources.update( + { + f'https://www.myanonamouse.net/t/{result["id"]}': TorrentSource( + protocol="torrent", + guid=f'https://www.myanonamouse.net/t/{result["id"]}', + indexer_id=-1, # We don't know MAM's id within prowlarr. + indexer="MyAnonamouse", + title=result["title"], + seeders=result.get("seeders", 0), + leechers=result.get("leechers", 0), + size=-1, + info_url=f'https://www.myanonamouse.net/t/{result["id"]}', + indexer_flags=( + ["freeleech"] if result["personal_freeleech"] == 1 else [] + ), # TODO add differentiate between freeleech and VIP freeleech availible flags in result: [free, fl_vip, personal_freeleech] + publish_date=datetime.fromisoformat(result["added"]), + authors=( + list(json.loads(result["author_info"]).values()) + if result["author_info"] + else [] + ), + narrators=( + list(json.loads(result["narrator_info"]).values()) + if result["narrator_info"] + else [] + ), + ) + } + ) + + mam_source_cache.set(sources, "mam_" + query) + + return sources + + +def inject_mam_metadata( + prowlarrData: list[ProwlarrSource], mamData: dict[str, TorrentSource] +) -> list[ProwlarrSource]: + for p in prowlarrData: + m = mamData.get(p.guid) + if m is None: + continue + p.authors = m.authors + p.narrators = m.narrators + + return prowlarrData diff --git a/app/internal/mam/mam.py b/app/internal/mam/mam.py deleted file mode 100644 index 1d70305..0000000 --- a/app/internal/mam/mam.py +++ /dev/null @@ -1,147 +0,0 @@ -import json -import logging -from datetime import datetime -from typing import Any, Literal, Optional, Dict -from urllib.parse import urlencode, urljoin - -from aiohttp import ClientSession -from sqlmodel import Session - -from app.internal.models import ( - TorrentSource, - ProwlarrSource, -) -from app.util.cache import SimpleCache, StringConfigCache - -logger = logging.getLogger(__name__) - - -class MamMisconfigured(ValueError): - pass - - -MamConfigKey = Literal[ - "mam_session_id", - "mam_source_ttl", - "mam_active" -] - - -class MamConfig(StringConfigCache[MamConfigKey]): - def raise_if_invalid(self, session: Session): - if not self.get_session_id(session): - raise MamMisconfigured("mam_id not set") - - def is_valid(self, session: Session) -> bool: - return ( - self.get_session_id(session) is not None and self.get_session_id(session)!="" - ) - - def get_session_id(self, session: Session) -> Optional[str]: - return self.get(session, "mam_session_id") - - def set_mam_id(self, session: Session, mam_id: str): - self.set(session, "mam_session_id", mam_id) - def get_source_ttl(self, session: Session) -> int: - return self.get_int(session, "mam_source_ttl", 24 * 60 * 60) - - def set_source_ttl(self, session: Session, source_ttl: int): - self.set_int(session, "mam_source_ttl", source_ttl) - def is_active(self, session: Session) -> bool: - return self.get(session, "mam_active")=="True" - def set_active(self, session: Session, state: bool): - self.set(session, "mam_active", str(state)) - - -mam_config = MamConfig() -mam_source_cache = SimpleCache[dict[str, TorrentSource]]() - - -def flush_Mam_cache(): - mam_source_cache.flush() - -# Downloading is still handled via prowlarr. - -async def query_mam( - session: Session, - query: Optional[str], - force_refresh: bool = False, -) -> dict[str, TorrentSource]: - if not query: - return dict() - - session_id = mam_config.get_session_id(session) - assert session_id is not None - - if not force_refresh: - source_ttl = mam_config.get_source_ttl(session) - cached_sources = mam_source_cache.get(source_ttl,"mam_"+query) - if cached_sources: - return cached_sources - params: dict[str, Any] = { - "tor[text]": query, # book title + author(s) - - "tor[main_cat]": [13], - "tor[searchIn]": "torrents", - 'tor[srchIn][author]': 'true', - 'tor[srchIn][title]': 'true', - 'tor[searchType]': 'active', - "startNumber": 0, - "perpage": 100, - } - - base_url = "https://www.myanonamouse.net" - url = urljoin(base_url, f"/tor/js/loadSearchJSONbasic.php?{urlencode(params, doseq=True)}") - - logger.info("Querying Mam: %s", url) - async with ClientSession() as client_session: - - async with client_session.get( - url, - cookies={"mam_id":session_id} - ) as response: - search_results = await response.json() - # Storing in dict for faster retrieval by guid - sources : Dict[str,TorrentSource] = dict() - - for result in search_results["data"]: - # TODO reduce to just authors / narrator unless there is a use for the other data. - sources.update({ - f'https://www.myanonamouse.net/t/{result["id"]}': - TorrentSource( - protocol="torrent", - guid=f'https://www.myanonamouse.net/t/{result["id"]}', - indexer_id=-1, # We don't know MAM's id within prowlarr. - indexer="MyAnonamouse", - title=result["title"], - seeders=result.get("seeders", 0), - leechers=result.get("leechers", 0), - size=-1, - info_url=f'https://www.myanonamouse.net/t/{result["id"]}', - indexer_flags=["freeleech"] if result["personal_freeleech"]==1 else [], # TODO add differentiate between freeleech and VIP freeleech availible flags in result: [free, fl_vip, personal_freeleech] - publish_date=datetime.fromisoformat(result["added"]), - authors=list(json.loads(result["author_info"]).values() ) if result["author_info"] else [], - narrators=list(json.loads(result["narrator_info"]).values()) if result["narrator_info"] else [] - ) - } - ) - - - mam_source_cache.set(sources, "mam_" + query) - - return sources - - -def inject_mam_metadata(prowlarrData: list[ProwlarrSource], mamData: Dict[str,TorrentSource]) -> list[ProwlarrSource]: - print(mamData) - for p in prowlarrData: - m =mamData.get(p.guid) - if m is None: - print("Not found: ", p.title, p.guid) - continue - p.authors= m.authors - p.narrators = m.narrators - print(m.authors, m.narrators, p.title) - - return prowlarrData - diff --git a/app/internal/query.py b/app/internal/query.py index 468e0c9..4700f95 100644 --- a/app/internal/query.py +++ b/app/internal/query.py @@ -13,11 +13,7 @@ from app.internal.prowlarr.prowlarr import ( start_download, ) -from app.internal.mam.mam import ( - mam_config, - query_mam, - inject_mam_metadata -) +from app.internal.indexers.mam import mam_config, query_mam, inject_mam_metadata from app.internal.ranking.download_ranking import rank_sources querying: set[str] = set() @@ -72,10 +68,11 @@ async def query_sources( mam_sources = await query_mam( session, + client_session, query, force_refresh=force_refresh, ) - sources = inject_mam_metadata(prowlarrData=sources,mamData=mam_sources) + sources = inject_mam_metadata(prowlarrData=sources, mamData=mam_sources) ranked = await rank_sources(session, client_session, sources, book) diff --git a/app/internal/ranking/download_ranking.py b/app/internal/ranking/download_ranking.py index c80acf3..6cd81db 100644 --- a/app/internal/ranking/download_ranking.py +++ b/app/internal/ranking/download_ranking.py @@ -10,7 +10,7 @@ from sqlmodel import Session from app.internal.models import BookRequest, ProwlarrSource from app.internal.ranking.quality import quality_config from app.internal.ranking.quality_extract import Quality, extract_qualities -from app.internal.mam.mam import mam_config + class RankSource(pydantic.BaseModel): source: ProwlarrSource @@ -178,22 +178,30 @@ class CompareSource: return int(b_title) - int(a_title) def _compare_authors(self, a: RankSource, b: RankSource, next_compare: int) -> int: - if(mam_config.is_active(self.session) and (a.source.authors!=[] or b.source.authors!=[])): - a_score = get_intersection_length(a.source.authors, self.book.authors) - b_score = get_intersection_length(b.source.authors, self.book.authors) - else: - - - a_score = vaguely_exist_in_title( + a_score = max( + vaguely_exist_in_title( self.book.authors, a.source.title, quality_config.get_name_exists_ratio(self.session), - ) - b_score = vaguely_exist_in_title( + ), + fuzzy_author_narrator_match( + a.source.authors, + self.book.authors, + quality_config.get_name_exists_ratio(self.session), + ), + ) + b_score = max( + vaguely_exist_in_title( self.book.authors, b.source.title, quality_config.get_name_exists_ratio(self.session), - ) + ), + fuzzy_author_narrator_match( + b.source.authors, + self.book.authors, + quality_config.get_name_exists_ratio(self.session), + ), + ) if a_score == b_score: return self._get_next_compare(next_compare)(a, b, next_compare + 1) return b_score - a_score @@ -201,20 +209,30 @@ class CompareSource: def _compare_narrators( self, a: RankSource, b: RankSource, next_compare: int ) -> int: - if(mam_config.is_active(self.session) and (a.source.narrators!=[] or b.source.narrators!=[])): - a_score = get_intersection_length(a.source.authors, self.book.authors) - b_score = get_intersection_length(b.source.authors, self.book.authors) - else: - a_score = vaguely_exist_in_title( + a_score = max( + vaguely_exist_in_title( self.book.narrators, a.source.title, quality_config.get_name_exists_ratio(self.session), - ) - b_score = vaguely_exist_in_title( + ), + fuzzy_author_narrator_match( + a.source.narrators, + self.book.narrators, + quality_config.get_name_exists_ratio(self.session), + ), + ) + b_score = max( + vaguely_exist_in_title( self.book.narrators, b.source.title, quality_config.get_name_exists_ratio(self.session), - ) + ), + fuzzy_author_narrator_match( + b.source.narrators, + self.book.narrators, + quality_config.get_name_exists_ratio(self.session), + ), + ) if a_score == b_score: return self._get_next_compare(next_compare)(a, b, next_compare + 1) return b_score - a_score @@ -234,7 +252,29 @@ class CompareSource: return int((a.source.publish_date - b.source.publish_date).total_seconds()) # With torrents: older => better return int((b.source.publish_date - a.source.publish_date).total_seconds()) - + + +def fuzzy_author_narrator_match( + source_people: list[str], book_people: list[str], name_exists_ratio: int +) -> int: + """Calculate a fuzzy matching score between two lists of author/narrator names.""" + if not source_people or not book_people: + return 0 + score = 0 + for book_person in book_people: + best_match = 0 + for source_person in source_people: + match_score = fuzz.token_set_ratio( + book_person, source_person, processor=utils.default_process + ) + best_match = max(best_match, match_score) + + # Only count matches above threshold + if best_match > name_exists_ratio: + score += 1 + + return score + def vaguely_exist_in_title(words: list[str], title: str, name_exists_ratio: int) -> int: return sum( @@ -243,12 +283,12 @@ def vaguely_exist_in_title(words: list[str], title: str, name_exists_ratio: int) if fuzz.token_set_ratio(w, title, processor=utils.default_process) > name_exists_ratio ) -def get_intersection_length(a : list[str],b: list[str]): + + +def get_intersection_length(a: list[str], b: list[str]): return len(set(a).intersection(set(b))) - - def exists_in_title(word: str, title: str, title_exists_ratio: int) -> bool: return ( fuzz.partial_ratio(word, title, processor=utils.default_process) diff --git a/app/routers/settings.py b/app/routers/settings.py index 807cd58..d82323b 100644 --- a/app/routers/settings.py +++ b/app/routers/settings.py @@ -10,7 +10,7 @@ from app.internal.models import EventEnum, GroupEnum, Notification, User from app.internal.prowlarr.indexer_categories import indexer_categories from app.internal.notifications import send_notification from app.internal.prowlarr.prowlarr import flush_prowlarr_cache, prowlarr_config -from app.internal.mam.mam import mam_config +from app.internal.indexers.mam import mam_config from app.internal.ranking.quality import IndexerFlag, QualityRange, quality_config from app.util.auth import ( @@ -282,7 +282,6 @@ def read_prowlarr( "prowlarr_misconfigured": True if prowlarr_misconfigured else False, "mam_active": mam_is_active, "mam_id": mam_id, - }, ) @@ -336,6 +335,7 @@ def update_indexer_categories( block_name="category", ) + @router.put("/mam/mam_id") def update_mam_id( mam_id: Annotated[str, Form()], @@ -347,6 +347,7 @@ def update_mam_id( mam_config.set_mam_id(session, mam_id) return Response(status_code=204, headers={"HX-Refresh": "true"}) + @router.put("/mam/activate") def activate_mam( session: Annotated[Session, Depends(get_session)], @@ -357,6 +358,7 @@ def activate_mam( mam_config.set_active(session, True) return Response(status_code=204, headers={"HX-Refresh": "true"}) + @router.put("/mam/deactivate") def deactivate_mam( session: Annotated[Session, Depends(get_session)], @@ -368,7 +370,6 @@ def deactivate_mam( return Response(status_code=204, headers={"HX-Refresh": "true"}) - @router.get("/download") def read_download( request: Request, diff --git a/app/routers/wishlist.py b/app/routers/wishlist.py index 4c99f8f..3d4eb91 100644 --- a/app/routers/wishlist.py +++ b/app/routers/wishlist.py @@ -26,9 +26,7 @@ from app.internal.prowlarr.prowlarr import ( prowlarr_config, start_download, ) -from app.internal.mam.mam import ( - mam_config -) +from app.internal.indexers.mam import mam_config from app.internal.query import query_sources from app.internal.ranking.quality import quality_config from app.util.auth import DetailedUser, get_authenticated_user @@ -202,7 +200,7 @@ async def list_sources( { "book": result.book, "sources": result.sources, - "mam_active": mam_config.is_active(session) + "mam_active": mam_config.is_active(session), }, )