From 2bab24de578d54af19502a973ed680fbc0d8f617 Mon Sep 17 00:00:00 2001 From: Leandro Zazzi Date: Mon, 10 Mar 2025 21:08:16 +0100 Subject: [PATCH 1/5] add mam integration backend --- app/internal/mam/mam.py | 147 ++++++++++++++++++++++++++ app/internal/models.py | 2 + app/internal/query.py | 15 +++ app/routers/settings.py | 39 +++++++ templates/settings_page/prowlarr.html | 49 +++++++++ 5 files changed, 252 insertions(+) create mode 100644 app/internal/mam/mam.py diff --git a/app/internal/mam/mam.py b/app/internal/mam/mam.py new file mode 100644 index 0000000..219ffcb --- /dev/null +++ b/app/internal/mam/mam.py @@ -0,0 +1,147 @@ +import json +import logging +from datetime import datetime +from typing import Any, Literal, Optional, Dict +from urllib.parse import urlencode, urljoin + +from aiohttp import ClientSession +from sqlmodel import Session + +from app.internal.models import ( + TorrentSource, + ProwlarrSource, +) +from app.util.cache import SimpleCache, StringConfigCache + +logger = logging.getLogger(__name__) + + +class MamMisconfigured(ValueError): + pass + + +MamConfigKey = Literal[ + "mam_session_id", + "mam_source_ttl", + "mam_active" +] + + +class MamConfig(StringConfigCache[MamConfigKey]): + def raise_if_invalid(self, session: Session): + if not self.get_session_id(session): + raise MamMisconfigured("mam_id not set") + + def is_valid(self, session: Session) -> bool: + return ( + self.get_session_id(session) is not None and self.get_session_id(session)!="" + ) + + def get_session_id(self, session: Session) -> Optional[str]: + return self.get(session, "mam_session_id") + + def set_mam_id(self, session: Session, mam_id: str): + self.set(session, "mam_session_id", mam_id) + def get_source_ttl(self, session: Session) -> int: + return self.get_int(session, "mam_source_ttl", 24 * 60 * 60) + + def set_source_ttl(self, session: Session, source_ttl: int): + self.set_int(session, "mam_source_ttl", source_ttl) + def is_active(self, session: Session) -> bool: + return self.get(session, "mam_active")=="True" + def set_active(self, session: Session, state: bool): + self.set(session, "mam_active", str(state)) + + +mam_config = MamConfig() +mam_source_cache = SimpleCache[dict[str, TorrentSource]]() + + +def flush_Mam_cache(): + mam_source_cache.flush() + +# Downloading is still handled via prowlarr. + +async def query_mam( + session: Session, + query: Optional[str], + force_refresh: bool = False, +) -> dict[str, TorrentSource]: + if not query: + return {} + + base_url = "https://www.myanonamouse.net" + session_id = mam_config.get_session_id(session) + assert session_id is not None + + if not force_refresh: + source_ttl = mam_config.get_source_ttl(session) + cached_sources = mam_source_cache.get(source_ttl, query) + if cached_sources: + return cached_sources + params: dict[str, Any] = { + "text": query, # book title + author(s) + "perpage": 100, + "tor": { + "main_cat": {13}, # 13 is the audiobook category on mam + "searchIn": "torrents", + "searchType": "active", # retrieve only torrents with at least 1 seed. + "srchIn": { + "title": "true", + "author": "true", + }, + }, + "startNumber": 0 #offset + } + + + url = urljoin(base_url, f"/tor/js/loadSearchJSONbasic.php?{urlencode(params, doseq=True)}") + + logger.info("Querying Mam: %s", url) + async with ClientSession() as client_session: + + async with client_session.get( + url, + cookies={"mam_id":mam_config.get_session_id} + ) as response: + search_results = await response.json() + sources : Dict[str,TorrentSource] = {} + + for result in search_results: + # TODO reduce to just authors / narrator unless there is a use for the other data. + sources.update({ + f'https://www.myanonamouse.net/t/{result["id"]}': + TorrentSource( + protocol="torrent", + guid=f'https://www.myanonamouse.net/t/{result["id"]}', + indexer_id=-1, # We don't know MAM's id within prowlarr. + indexer="MyAnonamouse", + title=result["title"], + seeders=result.get("seeders", 0), + leechers=result.get("leechers", 0), + size=-1, + info_url=f'https://www.myanonamouse.net/t/{result["id"]}', + indexer_flags=["freeleech"] if result["personal_freeleech"]==1 else [], # TODO add differentiate between freeleech and VIP freeleech availible flags in result: [free, fl_vip, personal_freeleech] + publish_date=datetime.fromisoformat(result["added"]), + authors=list(json.load(result["author_info"]).values()), + narrators=list(json.load(result["narrator_info"]).values()) + ) + } + ) + + + mam_source_cache.set(sources, query) + + return sources + + +def inject_mam_metadata(prowlarrData: list[ProwlarrSource], mamData: Dict[str,TorrentSource]) -> list[ProwlarrSource]: + for p in prowlarrData: + m =mamData.get(p.guid) + if m is None: + continue + p.authors= m.authors + p.narrators = m.narrators + + return prowlarrData + diff --git a/app/internal/models.py b/app/internal/models.py index b9fead1..d7bde62 100644 --- a/app/internal/models.py +++ b/app/internal/models.py @@ -127,6 +127,8 @@ class BaseSource(BaseModel): indexer_id: int indexer: str title: str + authors: list[str] = Field(default_factory=list, sa_column=Column(JSON)) + narrators: list[str] = Field(default_factory=list, sa_column=Column(JSON)) size: int # in bytes publish_date: datetime info_url: str diff --git a/app/internal/query.py b/app/internal/query.py index 89a9c98..fae200f 100644 --- a/app/internal/query.py +++ b/app/internal/query.py @@ -12,6 +12,12 @@ from app.internal.prowlarr.prowlarr import ( query_prowlarr, start_download, ) + +from app.internal.mam.mam import ( + mam_config, + query_mam, + inject_mam_metadata +) from app.internal.ranking.download_ranking import rank_sources querying: set[str] = set() @@ -61,6 +67,15 @@ async def query_sources( query, force_refresh=force_refresh, ) + if mam_config.is_active(session): + mam_config.raise_if_invalid(session) + + mam_sources = await query_mam( + session, + query, + force_refresh=force_refresh, + ) + sources = inject_mam_metadata(sources,mam_sources) ranked = await rank_sources(session, client_session, sources, book) diff --git a/app/routers/settings.py b/app/routers/settings.py index f1a9e82..807cd58 100644 --- a/app/routers/settings.py +++ b/app/routers/settings.py @@ -10,6 +10,8 @@ from app.internal.models import EventEnum, GroupEnum, Notification, User from app.internal.prowlarr.indexer_categories import indexer_categories from app.internal.notifications import send_notification from app.internal.prowlarr.prowlarr import flush_prowlarr_cache, prowlarr_config +from app.internal.mam.mam import mam_config + from app.internal.ranking.quality import IndexerFlag, QualityRange, quality_config from app.util.auth import ( DetailedUser, @@ -264,6 +266,8 @@ def read_prowlarr( prowlarr_base_url = prowlarr_config.get_base_url(session) prowlarr_api_key = prowlarr_config.get_api_key(session) selected = set(prowlarr_config.get_categories(session)) + mam_is_active = mam_config.is_active(session) + mam_id = mam_config.get_session_id(session) return template_response( "settings_page/prowlarr.html", @@ -276,6 +280,9 @@ def read_prowlarr( "indexer_categories": indexer_categories, "selected_categories": selected, "prowlarr_misconfigured": True if prowlarr_misconfigured else False, + "mam_active": mam_is_active, + "mam_id": mam_id, + }, ) @@ -329,6 +336,38 @@ def update_indexer_categories( block_name="category", ) +@router.put("/mam/mam_id") +def update_mam_id( + mam_id: Annotated[str, Form()], + session: Annotated[Session, Depends(get_session)], + admin_user: Annotated[ + DetailedUser, Depends(get_authenticated_user(GroupEnum.admin)) + ], +): + mam_config.set_mam_id(session, mam_id) + return Response(status_code=204, headers={"HX-Refresh": "true"}) + +@router.put("/mam/activate") +def activate_mam( + session: Annotated[Session, Depends(get_session)], + admin_user: Annotated[ + DetailedUser, Depends(get_authenticated_user(GroupEnum.admin)) + ], +): + mam_config.set_active(session, True) + return Response(status_code=204, headers={"HX-Refresh": "true"}) + +@router.put("/mam/deactivate") +def deactivate_mam( + session: Annotated[Session, Depends(get_session)], + admin_user: Annotated[ + DetailedUser, Depends(get_authenticated_user(GroupEnum.admin)) + ], +): + mam_config.set_active(session, False) + return Response(status_code=204, headers={"HX-Refresh": "true"}) + + @router.get("/download") def read_download( diff --git a/templates/settings_page/prowlarr.html b/templates/settings_page/prowlarr.html index 45052e3..d8b428a 100644 --- a/templates/settings_page/prowlarr.html +++ b/templates/settings_page/prowlarr.html @@ -111,5 +111,54 @@ {% endblock %} + +

MyAnonamouse integration

+ + + {% if mam_active %} +
+ + +
+ +
+ + + +
+ + {% else %} +
+ + +
+ {% endif %} {% endblock %} From 97d284ca75ff6db3c01dcb685ffd855e8808fb4b Mon Sep 17 00:00:00 2001 From: Leandro Zazzi Date: Tue, 11 Mar 2025 20:19:57 +0100 Subject: [PATCH 2/5] fix mam backend search and add auth/narrator to frontend --- app/internal/mam/mam.py | 57 ++++++++++++++-------------- app/internal/query.py | 2 +- app/routers/wishlist.py | 4 ++ templates/wishlist_page/sources.html | 8 ++++ 4 files changed, 42 insertions(+), 29 deletions(-) diff --git a/app/internal/mam/mam.py b/app/internal/mam/mam.py index 219ffcb..294b4d6 100644 --- a/app/internal/mam/mam.py +++ b/app/internal/mam/mam.py @@ -68,46 +68,44 @@ async def query_mam( force_refresh: bool = False, ) -> dict[str, TorrentSource]: if not query: - return {} - - base_url = "https://www.myanonamouse.net" + return dict() + session_id = mam_config.get_session_id(session) assert session_id is not None - - if not force_refresh: - source_ttl = mam_config.get_source_ttl(session) - cached_sources = mam_source_cache.get(source_ttl, query) - if cached_sources: - return cached_sources + # TODO: Add cache, disabled because this cache is global so it would return prowlarr results. A dirty solution would be to just have it in the cache under "mam_" + querry. This would work. + # if not force_refresh: + # source_ttl = mam_config.get_source_ttl(session) + # cached_sources = mam_source_cache.get(source_ttl, query) + # if cached_sources: + # return cached_sources params: dict[str, Any] = { - "text": query, # book title + author(s) + "tor[text]": query, # book title + author(s) + + "tor[main_cat]": [13], + "tor[searchIn]": "torrents", + 'tor[srchIn][author]': 'true', + 'tor[srchIn][title]': 'true', + 'tor[searchType]': 'active', + "startNumber": 0, "perpage": 100, - "tor": { - "main_cat": {13}, # 13 is the audiobook category on mam - "searchIn": "torrents", - "searchType": "active", # retrieve only torrents with at least 1 seed. - "srchIn": { - "title": "true", - "author": "true", - }, - }, - "startNumber": 0 #offset } - + base_url = "https://www.myanonamouse.net" url = urljoin(base_url, f"/tor/js/loadSearchJSONbasic.php?{urlencode(params, doseq=True)}") logger.info("Querying Mam: %s", url) + print(url) async with ClientSession() as client_session: async with client_session.get( url, - cookies={"mam_id":mam_config.get_session_id} + cookies={"mam_id":mam_config.get_session_id(session)} ) as response: - search_results = await response.json() - sources : Dict[str,TorrentSource] = {} + search_results = await response.json() + # Storing in dict for faster retrieval by guid + sources : Dict[str,TorrentSource] = dict() - for result in search_results: + for result in search_results["data"]: # TODO reduce to just authors / narrator unless there is a use for the other data. sources.update({ f'https://www.myanonamouse.net/t/{result["id"]}': @@ -123,25 +121,28 @@ async def query_mam( info_url=f'https://www.myanonamouse.net/t/{result["id"]}', indexer_flags=["freeleech"] if result["personal_freeleech"]==1 else [], # TODO add differentiate between freeleech and VIP freeleech availible flags in result: [free, fl_vip, personal_freeleech] publish_date=datetime.fromisoformat(result["added"]), - authors=list(json.load(result["author_info"]).values()), - narrators=list(json.load(result["narrator_info"]).values()) + authors=list(json.loads(result["author_info"]).values() ) if result["author_info"] else [], + narrators=list(json.loads(result["narrator_info"]).values()) if result["narrator_info"] else [] ) } ) - mam_source_cache.set(sources, query) + # mam_source_cache.set(sources, query) return sources def inject_mam_metadata(prowlarrData: list[ProwlarrSource], mamData: Dict[str,TorrentSource]) -> list[ProwlarrSource]: + print(mamData) for p in prowlarrData: m =mamData.get(p.guid) if m is None: + print("Not found: ", p.title, p.guid) continue p.authors= m.authors p.narrators = m.narrators + print(m.authors, m.narrators, p.title) return prowlarrData diff --git a/app/internal/query.py b/app/internal/query.py index fae200f..468e0c9 100644 --- a/app/internal/query.py +++ b/app/internal/query.py @@ -75,7 +75,7 @@ async def query_sources( query, force_refresh=force_refresh, ) - sources = inject_mam_metadata(sources,mam_sources) + sources = inject_mam_metadata(prowlarrData=sources,mamData=mam_sources) ranked = await rank_sources(session, client_session, sources, book) diff --git a/app/routers/wishlist.py b/app/routers/wishlist.py index db9510c..4c99f8f 100644 --- a/app/routers/wishlist.py +++ b/app/routers/wishlist.py @@ -26,6 +26,9 @@ from app.internal.prowlarr.prowlarr import ( prowlarr_config, start_download, ) +from app.internal.mam.mam import ( + mam_config +) from app.internal.query import query_sources from app.internal.ranking.quality import quality_config from app.util.auth import DetailedUser, get_authenticated_user @@ -199,6 +202,7 @@ async def list_sources( { "book": result.book, "sources": result.sources, + "mam_active": mam_config.is_active(session) }, ) diff --git a/templates/wishlist_page/sources.html b/templates/wishlist_page/sources.html index a2f22cf..2355648 100644 --- a/templates/wishlist_page/sources.html +++ b/templates/wishlist_page/sources.html @@ -28,6 +28,10 @@ title + {% if mam_active %} + author(s) + narrator(s) + {% endif %} indexer flags seed / leech (or grabs) @@ -46,6 +50,10 @@ {{ source.title }} + {% if mam_active %} + {{ source.authors }} + {{ source.narrators }} + {% endif %} {{ source.indexer }} {{ source.indexer_flags|join(', ') }} {% if source.protocol == "torrent" %} From 2df4b289ec6abc777e81447df66ae1c2c39b22a4 Mon Sep 17 00:00:00 2001 From: Leandro Zazzi Date: Tue, 11 Mar 2025 21:00:03 +0100 Subject: [PATCH 3/5] add mam_data to download_ranking --- app/internal/mam/mam.py | 3 +- app/internal/ranking/download_ranking.py | 58 +++++++++++++++--------- templates/wishlist_page/sources.html | 4 +- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/app/internal/mam/mam.py b/app/internal/mam/mam.py index 294b4d6..e61b44e 100644 --- a/app/internal/mam/mam.py +++ b/app/internal/mam/mam.py @@ -94,12 +94,11 @@ async def query_mam( url = urljoin(base_url, f"/tor/js/loadSearchJSONbasic.php?{urlencode(params, doseq=True)}") logger.info("Querying Mam: %s", url) - print(url) async with ClientSession() as client_session: async with client_session.get( url, - cookies={"mam_id":mam_config.get_session_id(session)} + cookies={"mam_id":session_id} ) as response: search_results = await response.json() # Storing in dict for faster retrieval by guid diff --git a/app/internal/ranking/download_ranking.py b/app/internal/ranking/download_ranking.py index ac17924..c80acf3 100644 --- a/app/internal/ranking/download_ranking.py +++ b/app/internal/ranking/download_ranking.py @@ -10,7 +10,7 @@ from sqlmodel import Session from app.internal.models import BookRequest, ProwlarrSource from app.internal.ranking.quality import quality_config from app.internal.ranking.quality_extract import Quality, extract_qualities - +from app.internal.mam.mam import mam_config class RankSource(pydantic.BaseModel): source: ProwlarrSource @@ -178,16 +178,22 @@ class CompareSource: return int(b_title) - int(a_title) def _compare_authors(self, a: RankSource, b: RankSource, next_compare: int) -> int: - a_score = vaguely_exist_in_title( - self.book.authors, - a.source.title, - quality_config.get_name_exists_ratio(self.session), - ) - b_score = vaguely_exist_in_title( - self.book.authors, - b.source.title, - quality_config.get_name_exists_ratio(self.session), - ) + if(mam_config.is_active(self.session) and (a.source.authors!=[] or b.source.authors!=[])): + a_score = get_intersection_length(a.source.authors, self.book.authors) + b_score = get_intersection_length(b.source.authors, self.book.authors) + else: + + + a_score = vaguely_exist_in_title( + self.book.authors, + a.source.title, + quality_config.get_name_exists_ratio(self.session), + ) + b_score = vaguely_exist_in_title( + self.book.authors, + b.source.title, + quality_config.get_name_exists_ratio(self.session), + ) if a_score == b_score: return self._get_next_compare(next_compare)(a, b, next_compare + 1) return b_score - a_score @@ -195,16 +201,20 @@ class CompareSource: def _compare_narrators( self, a: RankSource, b: RankSource, next_compare: int ) -> int: - a_score = vaguely_exist_in_title( - self.book.narrators, - a.source.title, - quality_config.get_name_exists_ratio(self.session), - ) - b_score = vaguely_exist_in_title( - self.book.narrators, - b.source.title, - quality_config.get_name_exists_ratio(self.session), - ) + if(mam_config.is_active(self.session) and (a.source.narrators!=[] or b.source.narrators!=[])): + a_score = get_intersection_length(a.source.authors, self.book.authors) + b_score = get_intersection_length(b.source.authors, self.book.authors) + else: + a_score = vaguely_exist_in_title( + self.book.narrators, + a.source.title, + quality_config.get_name_exists_ratio(self.session), + ) + b_score = vaguely_exist_in_title( + self.book.narrators, + b.source.title, + quality_config.get_name_exists_ratio(self.session), + ) if a_score == b_score: return self._get_next_compare(next_compare)(a, b, next_compare + 1) return b_score - a_score @@ -224,7 +234,7 @@ class CompareSource: return int((a.source.publish_date - b.source.publish_date).total_seconds()) # With torrents: older => better return int((b.source.publish_date - a.source.publish_date).total_seconds()) - + def vaguely_exist_in_title(words: list[str], title: str, name_exists_ratio: int) -> int: return sum( @@ -233,6 +243,10 @@ def vaguely_exist_in_title(words: list[str], title: str, name_exists_ratio: int) if fuzz.token_set_ratio(w, title, processor=utils.default_process) > name_exists_ratio ) +def get_intersection_length(a : list[str],b: list[str]): + return len(set(a).intersection(set(b))) + + def exists_in_title(word: str, title: str, title_exists_ratio: int) -> bool: diff --git a/templates/wishlist_page/sources.html b/templates/wishlist_page/sources.html index 2355648..f2f0a91 100644 --- a/templates/wishlist_page/sources.html +++ b/templates/wishlist_page/sources.html @@ -51,8 +51,8 @@ {{ source.title }} {% if mam_active %} - {{ source.authors }} - {{ source.narrators }} + {{ source.authors|join(', ') }} + {{ source.narrators|join(', ') }} {% endif %} {{ source.indexer }} {{ source.indexer_flags|join(', ') }} From 127c9898377283226eb03bf2ec5fb0225d6a2ea7 Mon Sep 17 00:00:00 2001 From: Leandro Zazzi Date: Tue, 11 Mar 2025 21:13:22 +0100 Subject: [PATCH 4/5] Cache MAM requests --- app/internal/mam/mam.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/app/internal/mam/mam.py b/app/internal/mam/mam.py index e61b44e..1d70305 100644 --- a/app/internal/mam/mam.py +++ b/app/internal/mam/mam.py @@ -72,12 +72,12 @@ async def query_mam( session_id = mam_config.get_session_id(session) assert session_id is not None - # TODO: Add cache, disabled because this cache is global so it would return prowlarr results. A dirty solution would be to just have it in the cache under "mam_" + querry. This would work. - # if not force_refresh: - # source_ttl = mam_config.get_source_ttl(session) - # cached_sources = mam_source_cache.get(source_ttl, query) - # if cached_sources: - # return cached_sources + + if not force_refresh: + source_ttl = mam_config.get_source_ttl(session) + cached_sources = mam_source_cache.get(source_ttl,"mam_"+query) + if cached_sources: + return cached_sources params: dict[str, Any] = { "tor[text]": query, # book title + author(s) @@ -127,7 +127,7 @@ async def query_mam( ) - # mam_source_cache.set(sources, query) + mam_source_cache.set(sources, "mam_" + query) return sources From a8dfd0e7e69619a7d82f46acd60f929bde36d7e1 Mon Sep 17 00:00:00 2001 From: Leandro Zazzi Date: Tue, 11 Mar 2025 22:56:16 +0100 Subject: [PATCH 5/5] add fuzzy matching and other suggestions --- app/internal/indexers/mam.py | 154 +++++++++++++++++++++++ app/internal/mam/mam.py | 147 ---------------------- app/internal/query.py | 9 +- app/internal/ranking/download_ranking.py | 86 +++++++++---- app/routers/settings.py | 7 +- app/routers/wishlist.py | 6 +- 6 files changed, 226 insertions(+), 183 deletions(-) create mode 100644 app/internal/indexers/mam.py delete mode 100644 app/internal/mam/mam.py diff --git a/app/internal/indexers/mam.py b/app/internal/indexers/mam.py new file mode 100644 index 0000000..e71fdc4 --- /dev/null +++ b/app/internal/indexers/mam.py @@ -0,0 +1,154 @@ +import json +import logging +from datetime import datetime +from typing import Any, Literal, Optional +from urllib.parse import urlencode, urljoin + +from aiohttp import ClientSession +from sqlmodel import Session + +from app.internal.models import ( + TorrentSource, + ProwlarrSource, +) +from app.util.cache import SimpleCache, StringConfigCache + +logger = logging.getLogger(__name__) + + +class MamMisconfigured(ValueError): + pass + + +MamConfigKey = Literal["mam_session_id", "mam_source_ttl", "mam_active"] + + +class MamConfig(StringConfigCache[MamConfigKey]): + def raise_if_invalid(self, session: Session): + if not self.get_session_id(session): + raise MamMisconfigured("mam_id not set") + + def is_valid(self, session: Session) -> bool: + return ( + self.get_session_id(session) is not None + and self.get_session_id(session) != "" + ) + + def get_session_id(self, session: Session) -> Optional[str]: + return self.get(session, "mam_session_id") + + def set_mam_id(self, session: Session, mam_id: str): + self.set(session, "mam_session_id", mam_id) + + def get_source_ttl(self, session: Session) -> int: + return self.get_int(session, "mam_source_ttl", 24 * 60 * 60) + + def set_source_ttl(self, session: Session, source_ttl: int): + self.set_int(session, "mam_source_ttl", source_ttl) + + def is_active(self, session: Session) -> bool: + return self.get(session, "mam_active") == "True" + + def set_active(self, session: Session, state: bool): + self.set(session, "mam_active", str(state)) + + +mam_config = MamConfig() +mam_source_cache = SimpleCache[dict[str, TorrentSource]]() + + +def flush_Mam_cache(): + mam_source_cache.flush() + + +# Downloading is still handled via prowlarr. + + +async def query_mam( + session: Session, + client_session: ClientSession, + query: Optional[str], + force_refresh: bool = False, +) -> dict[str, TorrentSource]: + if not query: + return dict() + + session_id = mam_config.get_session_id(session) + assert session_id is not None + + if not force_refresh: + source_ttl = mam_config.get_source_ttl(session) + cached_sources = mam_source_cache.get(source_ttl, "mam_" + query) + if cached_sources: + return cached_sources + params: dict[str, Any] = { + "tor[text]": query, # book title + author(s) + "tor[main_cat]": [13], # MAM audiobook category + "tor[searchIn]": "torrents", + "tor[srchIn][author]": "true", + "tor[srchIn][title]": "true", + "tor[searchType]": "active", # only search for torrents with at least 1 seeder. + "startNumber": 0, + "perpage": 100, + } + + base_url = "https://www.myanonamouse.net" + url = urljoin( + base_url, f"/tor/js/loadSearchJSONbasic.php?{urlencode(params, doseq=True)}" + ) + + logger.info("Querying Mam: %s", url) + + async with client_session.get(url, cookies={"mam_id": session_id}) as response: + search_results = await response.json() + # Storing in dict for faster retrieval by guid + sources: dict[str, TorrentSource] = dict() + + for result in search_results["data"]: + # TODO reduce to just authors / narrator unless there is a use for the other data. + sources.update( + { + f'https://www.myanonamouse.net/t/{result["id"]}': TorrentSource( + protocol="torrent", + guid=f'https://www.myanonamouse.net/t/{result["id"]}', + indexer_id=-1, # We don't know MAM's id within prowlarr. + indexer="MyAnonamouse", + title=result["title"], + seeders=result.get("seeders", 0), + leechers=result.get("leechers", 0), + size=-1, + info_url=f'https://www.myanonamouse.net/t/{result["id"]}', + indexer_flags=( + ["freeleech"] if result["personal_freeleech"] == 1 else [] + ), # TODO add differentiate between freeleech and VIP freeleech availible flags in result: [free, fl_vip, personal_freeleech] + publish_date=datetime.fromisoformat(result["added"]), + authors=( + list(json.loads(result["author_info"]).values()) + if result["author_info"] + else [] + ), + narrators=( + list(json.loads(result["narrator_info"]).values()) + if result["narrator_info"] + else [] + ), + ) + } + ) + + mam_source_cache.set(sources, "mam_" + query) + + return sources + + +def inject_mam_metadata( + prowlarrData: list[ProwlarrSource], mamData: dict[str, TorrentSource] +) -> list[ProwlarrSource]: + for p in prowlarrData: + m = mamData.get(p.guid) + if m is None: + continue + p.authors = m.authors + p.narrators = m.narrators + + return prowlarrData diff --git a/app/internal/mam/mam.py b/app/internal/mam/mam.py deleted file mode 100644 index 1d70305..0000000 --- a/app/internal/mam/mam.py +++ /dev/null @@ -1,147 +0,0 @@ -import json -import logging -from datetime import datetime -from typing import Any, Literal, Optional, Dict -from urllib.parse import urlencode, urljoin - -from aiohttp import ClientSession -from sqlmodel import Session - -from app.internal.models import ( - TorrentSource, - ProwlarrSource, -) -from app.util.cache import SimpleCache, StringConfigCache - -logger = logging.getLogger(__name__) - - -class MamMisconfigured(ValueError): - pass - - -MamConfigKey = Literal[ - "mam_session_id", - "mam_source_ttl", - "mam_active" -] - - -class MamConfig(StringConfigCache[MamConfigKey]): - def raise_if_invalid(self, session: Session): - if not self.get_session_id(session): - raise MamMisconfigured("mam_id not set") - - def is_valid(self, session: Session) -> bool: - return ( - self.get_session_id(session) is not None and self.get_session_id(session)!="" - ) - - def get_session_id(self, session: Session) -> Optional[str]: - return self.get(session, "mam_session_id") - - def set_mam_id(self, session: Session, mam_id: str): - self.set(session, "mam_session_id", mam_id) - def get_source_ttl(self, session: Session) -> int: - return self.get_int(session, "mam_source_ttl", 24 * 60 * 60) - - def set_source_ttl(self, session: Session, source_ttl: int): - self.set_int(session, "mam_source_ttl", source_ttl) - def is_active(self, session: Session) -> bool: - return self.get(session, "mam_active")=="True" - def set_active(self, session: Session, state: bool): - self.set(session, "mam_active", str(state)) - - -mam_config = MamConfig() -mam_source_cache = SimpleCache[dict[str, TorrentSource]]() - - -def flush_Mam_cache(): - mam_source_cache.flush() - -# Downloading is still handled via prowlarr. - -async def query_mam( - session: Session, - query: Optional[str], - force_refresh: bool = False, -) -> dict[str, TorrentSource]: - if not query: - return dict() - - session_id = mam_config.get_session_id(session) - assert session_id is not None - - if not force_refresh: - source_ttl = mam_config.get_source_ttl(session) - cached_sources = mam_source_cache.get(source_ttl,"mam_"+query) - if cached_sources: - return cached_sources - params: dict[str, Any] = { - "tor[text]": query, # book title + author(s) - - "tor[main_cat]": [13], - "tor[searchIn]": "torrents", - 'tor[srchIn][author]': 'true', - 'tor[srchIn][title]': 'true', - 'tor[searchType]': 'active', - "startNumber": 0, - "perpage": 100, - } - - base_url = "https://www.myanonamouse.net" - url = urljoin(base_url, f"/tor/js/loadSearchJSONbasic.php?{urlencode(params, doseq=True)}") - - logger.info("Querying Mam: %s", url) - async with ClientSession() as client_session: - - async with client_session.get( - url, - cookies={"mam_id":session_id} - ) as response: - search_results = await response.json() - # Storing in dict for faster retrieval by guid - sources : Dict[str,TorrentSource] = dict() - - for result in search_results["data"]: - # TODO reduce to just authors / narrator unless there is a use for the other data. - sources.update({ - f'https://www.myanonamouse.net/t/{result["id"]}': - TorrentSource( - protocol="torrent", - guid=f'https://www.myanonamouse.net/t/{result["id"]}', - indexer_id=-1, # We don't know MAM's id within prowlarr. - indexer="MyAnonamouse", - title=result["title"], - seeders=result.get("seeders", 0), - leechers=result.get("leechers", 0), - size=-1, - info_url=f'https://www.myanonamouse.net/t/{result["id"]}', - indexer_flags=["freeleech"] if result["personal_freeleech"]==1 else [], # TODO add differentiate between freeleech and VIP freeleech availible flags in result: [free, fl_vip, personal_freeleech] - publish_date=datetime.fromisoformat(result["added"]), - authors=list(json.loads(result["author_info"]).values() ) if result["author_info"] else [], - narrators=list(json.loads(result["narrator_info"]).values()) if result["narrator_info"] else [] - ) - } - ) - - - mam_source_cache.set(sources, "mam_" + query) - - return sources - - -def inject_mam_metadata(prowlarrData: list[ProwlarrSource], mamData: Dict[str,TorrentSource]) -> list[ProwlarrSource]: - print(mamData) - for p in prowlarrData: - m =mamData.get(p.guid) - if m is None: - print("Not found: ", p.title, p.guid) - continue - p.authors= m.authors - p.narrators = m.narrators - print(m.authors, m.narrators, p.title) - - return prowlarrData - diff --git a/app/internal/query.py b/app/internal/query.py index 468e0c9..4700f95 100644 --- a/app/internal/query.py +++ b/app/internal/query.py @@ -13,11 +13,7 @@ from app.internal.prowlarr.prowlarr import ( start_download, ) -from app.internal.mam.mam import ( - mam_config, - query_mam, - inject_mam_metadata -) +from app.internal.indexers.mam import mam_config, query_mam, inject_mam_metadata from app.internal.ranking.download_ranking import rank_sources querying: set[str] = set() @@ -72,10 +68,11 @@ async def query_sources( mam_sources = await query_mam( session, + client_session, query, force_refresh=force_refresh, ) - sources = inject_mam_metadata(prowlarrData=sources,mamData=mam_sources) + sources = inject_mam_metadata(prowlarrData=sources, mamData=mam_sources) ranked = await rank_sources(session, client_session, sources, book) diff --git a/app/internal/ranking/download_ranking.py b/app/internal/ranking/download_ranking.py index c80acf3..6cd81db 100644 --- a/app/internal/ranking/download_ranking.py +++ b/app/internal/ranking/download_ranking.py @@ -10,7 +10,7 @@ from sqlmodel import Session from app.internal.models import BookRequest, ProwlarrSource from app.internal.ranking.quality import quality_config from app.internal.ranking.quality_extract import Quality, extract_qualities -from app.internal.mam.mam import mam_config + class RankSource(pydantic.BaseModel): source: ProwlarrSource @@ -178,22 +178,30 @@ class CompareSource: return int(b_title) - int(a_title) def _compare_authors(self, a: RankSource, b: RankSource, next_compare: int) -> int: - if(mam_config.is_active(self.session) and (a.source.authors!=[] or b.source.authors!=[])): - a_score = get_intersection_length(a.source.authors, self.book.authors) - b_score = get_intersection_length(b.source.authors, self.book.authors) - else: - - - a_score = vaguely_exist_in_title( + a_score = max( + vaguely_exist_in_title( self.book.authors, a.source.title, quality_config.get_name_exists_ratio(self.session), - ) - b_score = vaguely_exist_in_title( + ), + fuzzy_author_narrator_match( + a.source.authors, + self.book.authors, + quality_config.get_name_exists_ratio(self.session), + ), + ) + b_score = max( + vaguely_exist_in_title( self.book.authors, b.source.title, quality_config.get_name_exists_ratio(self.session), - ) + ), + fuzzy_author_narrator_match( + b.source.authors, + self.book.authors, + quality_config.get_name_exists_ratio(self.session), + ), + ) if a_score == b_score: return self._get_next_compare(next_compare)(a, b, next_compare + 1) return b_score - a_score @@ -201,20 +209,30 @@ class CompareSource: def _compare_narrators( self, a: RankSource, b: RankSource, next_compare: int ) -> int: - if(mam_config.is_active(self.session) and (a.source.narrators!=[] or b.source.narrators!=[])): - a_score = get_intersection_length(a.source.authors, self.book.authors) - b_score = get_intersection_length(b.source.authors, self.book.authors) - else: - a_score = vaguely_exist_in_title( + a_score = max( + vaguely_exist_in_title( self.book.narrators, a.source.title, quality_config.get_name_exists_ratio(self.session), - ) - b_score = vaguely_exist_in_title( + ), + fuzzy_author_narrator_match( + a.source.narrators, + self.book.narrators, + quality_config.get_name_exists_ratio(self.session), + ), + ) + b_score = max( + vaguely_exist_in_title( self.book.narrators, b.source.title, quality_config.get_name_exists_ratio(self.session), - ) + ), + fuzzy_author_narrator_match( + b.source.narrators, + self.book.narrators, + quality_config.get_name_exists_ratio(self.session), + ), + ) if a_score == b_score: return self._get_next_compare(next_compare)(a, b, next_compare + 1) return b_score - a_score @@ -234,7 +252,29 @@ class CompareSource: return int((a.source.publish_date - b.source.publish_date).total_seconds()) # With torrents: older => better return int((b.source.publish_date - a.source.publish_date).total_seconds()) - + + +def fuzzy_author_narrator_match( + source_people: list[str], book_people: list[str], name_exists_ratio: int +) -> int: + """Calculate a fuzzy matching score between two lists of author/narrator names.""" + if not source_people or not book_people: + return 0 + score = 0 + for book_person in book_people: + best_match = 0 + for source_person in source_people: + match_score = fuzz.token_set_ratio( + book_person, source_person, processor=utils.default_process + ) + best_match = max(best_match, match_score) + + # Only count matches above threshold + if best_match > name_exists_ratio: + score += 1 + + return score + def vaguely_exist_in_title(words: list[str], title: str, name_exists_ratio: int) -> int: return sum( @@ -243,12 +283,12 @@ def vaguely_exist_in_title(words: list[str], title: str, name_exists_ratio: int) if fuzz.token_set_ratio(w, title, processor=utils.default_process) > name_exists_ratio ) -def get_intersection_length(a : list[str],b: list[str]): + + +def get_intersection_length(a: list[str], b: list[str]): return len(set(a).intersection(set(b))) - - def exists_in_title(word: str, title: str, title_exists_ratio: int) -> bool: return ( fuzz.partial_ratio(word, title, processor=utils.default_process) diff --git a/app/routers/settings.py b/app/routers/settings.py index 807cd58..d82323b 100644 --- a/app/routers/settings.py +++ b/app/routers/settings.py @@ -10,7 +10,7 @@ from app.internal.models import EventEnum, GroupEnum, Notification, User from app.internal.prowlarr.indexer_categories import indexer_categories from app.internal.notifications import send_notification from app.internal.prowlarr.prowlarr import flush_prowlarr_cache, prowlarr_config -from app.internal.mam.mam import mam_config +from app.internal.indexers.mam import mam_config from app.internal.ranking.quality import IndexerFlag, QualityRange, quality_config from app.util.auth import ( @@ -282,7 +282,6 @@ def read_prowlarr( "prowlarr_misconfigured": True if prowlarr_misconfigured else False, "mam_active": mam_is_active, "mam_id": mam_id, - }, ) @@ -336,6 +335,7 @@ def update_indexer_categories( block_name="category", ) + @router.put("/mam/mam_id") def update_mam_id( mam_id: Annotated[str, Form()], @@ -347,6 +347,7 @@ def update_mam_id( mam_config.set_mam_id(session, mam_id) return Response(status_code=204, headers={"HX-Refresh": "true"}) + @router.put("/mam/activate") def activate_mam( session: Annotated[Session, Depends(get_session)], @@ -357,6 +358,7 @@ def activate_mam( mam_config.set_active(session, True) return Response(status_code=204, headers={"HX-Refresh": "true"}) + @router.put("/mam/deactivate") def deactivate_mam( session: Annotated[Session, Depends(get_session)], @@ -368,7 +370,6 @@ def deactivate_mam( return Response(status_code=204, headers={"HX-Refresh": "true"}) - @router.get("/download") def read_download( request: Request, diff --git a/app/routers/wishlist.py b/app/routers/wishlist.py index 4c99f8f..3d4eb91 100644 --- a/app/routers/wishlist.py +++ b/app/routers/wishlist.py @@ -26,9 +26,7 @@ from app.internal.prowlarr.prowlarr import ( prowlarr_config, start_download, ) -from app.internal.mam.mam import ( - mam_config -) +from app.internal.indexers.mam import mam_config from app.internal.query import query_sources from app.internal.ranking.quality import quality_config from app.util.auth import DetailedUser, get_authenticated_user @@ -202,7 +200,7 @@ async def list_sources( { "book": result.book, "sources": result.sources, - "mam_active": mam_config.is_active(session) + "mam_active": mam_config.is_active(session), }, )