diff --git a/app/internal/indexers/abstract.py b/app/internal/indexers/abstract.py new file mode 100644 index 0000000..1aef9ca --- /dev/null +++ b/app/internal/indexers/abstract.py @@ -0,0 +1,59 @@ +from abc import ABC, abstractmethod +from typing import Any, Generic, TypeVar + +from aiohttp import ClientSession +from pydantic import BaseModel +from sqlmodel import Session + +from app.internal.indexers.configuration import Configurations +from app.internal.models import BookRequest, ProwlarrSource + + +class SessionContainer(BaseModel, arbitrary_types_allowed=True): + session: Session + client_session: ClientSession + + +T = TypeVar("T", bound=Configurations) + + +class AbstractIndexer(ABC, Generic[T]): + name: str + + @abstractmethod + async def setup( + self, + request: BookRequest, + container: SessionContainer, + configurations: Any, + ) -> None: + """ + Called initially when a book request is made. + Can be used to set up initial settings required + for the indexer or if the indexer only supports + a general search feature, it can be executed in + this step. + """ + pass + + @staticmethod + @abstractmethod + async def get_configurations( + container: SessionContainer, + ) -> T: + """ + Returns a list of configuration options that will be configurable on the frontend. + """ + pass + + @abstractmethod + async def is_matching_source( + self, source: ProwlarrSource, container: SessionContainer + ) -> bool: + pass + + @abstractmethod + async def edit_source_metadata( + self, source: ProwlarrSource, container: SessionContainer + ) -> None: + pass diff --git a/app/internal/indexers/configuration.py b/app/internal/indexers/configuration.py new file mode 100644 index 0000000..6794de4 --- /dev/null +++ b/app/internal/indexers/configuration.py @@ -0,0 +1,116 @@ +import logging +from typing import Any, Generic, Optional, TypeVar + +from pydantic import BaseModel +from sqlmodel import Session + +from app.util.cache import StringConfigCache + +logger = logging.getLogger(__name__) + + +T = TypeVar("T", str, int, bool, float, None) + + +class IndexerConfiguration(BaseModel, Generic[T]): + display_name: str + description: Optional[str] = None + default: Optional[T] = None + required: bool = False + type: type[T] + + def is_str(self) -> bool: + return self.type is str + + def is_float(self) -> bool: + return self.type is float + + def is_int(self) -> bool: + return self.type is int + + def is_bool(self) -> bool: + return self.type is bool + + +class Configurations(BaseModel): + """ + The configurations to use for an indexer. + Any fields of type `IndexerConfiguration` will + be passed in as a `ValuedConfigurations` object + to the setup method of the indexer and input + fields will be generated for them on the frontend. + """ + + pass + + +class ValuedConfigurations: + """ + Field names need to be unique across all indexers + and match up with the fields of the `Configurations` object. + """ + + pass + + +class ConfigurationException(ValueError): + pass + + +class MissingRequiredException(ConfigurationException): + pass + + +class InvalidTypeException(ConfigurationException): + pass + + +indexer_configuration_cache = StringConfigCache[str]() + + +def create_valued_configuration( + config: Configurations, + session: Session, + *, + check_required: bool = True, +) -> ValuedConfigurations: + """ + Using a configuration class, it retrieves the values from + the cache/db and handle setting the default values as well + as raising exceptions for required fields. + """ + + valued = ValuedConfigurations() + + configurations = vars(config) + for key, _value in configurations.items(): + if not isinstance(_value, IndexerConfiguration): + logger.debug("Skipping %s", key) + continue + value: IndexerConfiguration[Any] = _value + + config_value = indexer_configuration_cache.get(session, key) + if config_value is None: + config_value = value.default + + if check_required and value.required and config_value is None: + raise MissingRequiredException(f"Configuration {key} is required") + + if config_value is None: + setattr(valued, key, None) + elif value.type is str: + setattr(valued, key, config_value) + elif value.type is int: + try: + setattr(valued, key, int(config_value)) + except ValueError: + raise InvalidTypeException(f"Configuration {key} must be an integer") + elif value.type is float: + try: + setattr(valued, key, float(config_value)) + except ValueError: + raise InvalidTypeException(f"Configuration {key} must be a float") + elif value.type is bool: + setattr(valued, key, bool(config_value)) + + return valued diff --git a/app/internal/indexers/indexer_util.py b/app/internal/indexers/indexer_util.py new file mode 100644 index 0000000..097bdc9 --- /dev/null +++ b/app/internal/indexers/indexer_util.py @@ -0,0 +1,50 @@ +import logging +from typing import Any, cast + +from pydantic import BaseModel +from app.internal.indexers.indexers import indexers +from app.internal.indexers.abstract import AbstractIndexer, SessionContainer +from app.internal.indexers.configuration import ( + ConfigurationException, + IndexerConfiguration, + ValuedConfigurations, + create_valued_configuration, +) + +logger = logging.getLogger(__name__) + + +class IndexerContext(BaseModel, arbitrary_types_allowed=True): + indexer: AbstractIndexer[Any] + configuration: dict[str, IndexerConfiguration[Any]] + valued: ValuedConfigurations + + +async def get_indexer_contexts( + container: SessionContainer, *, check_required: bool = True +) -> list[IndexerContext]: + contexts: list[IndexerContext] = [] + for Indexer in indexers: + try: + configuration = await Indexer.get_configurations(container) + filtered_configuration: dict[str, IndexerConfiguration[Any]] = dict() + for k, v in cast(dict[str, Any], vars(configuration)).items(): + if isinstance(v, IndexerConfiguration): + filtered_configuration[k] = v + + valued_configuration = create_valued_configuration( + configuration, + container.session, + check_required=check_required, + ) + contexts.append( + IndexerContext( + indexer=Indexer(), + configuration=filtered_configuration, + valued=valued_configuration, + ) + ) + except ConfigurationException as e: + logger.error("Failed to get configurations for %s: %s", Indexer.name, e) + + return contexts diff --git a/app/internal/indexers/indexers.py b/app/internal/indexers/indexers.py new file mode 100644 index 0000000..a81e260 --- /dev/null +++ b/app/internal/indexers/indexers.py @@ -0,0 +1,8 @@ +from typing import Any +from app.internal.indexers.abstract import AbstractIndexer +from app.internal.indexers.mam import MamIndexer + + +indexers: list[type[AbstractIndexer[Any]]] = [ + MamIndexer, +] diff --git a/app/internal/indexers/mam.py b/app/internal/indexers/mam.py new file mode 100644 index 0000000..100ffb3 --- /dev/null +++ b/app/internal/indexers/mam.py @@ -0,0 +1,137 @@ +import json +import logging +from typing import Any +from urllib.parse import urlencode, urljoin + +from app.internal.indexers.abstract import ( + AbstractIndexer, + SessionContainer, +) +from app.internal.indexers.configuration import ( + Configurations, + IndexerConfiguration, + ValuedConfigurations, +) +from app.internal.models import ( + BookRequest, + ProwlarrSource, +) + +logger = logging.getLogger(__name__) + + +class MamConfigurations(Configurations): + mam_session_id: IndexerConfiguration[str] = IndexerConfiguration( + type=str, + display_name="MAM Session ID", + required=True, + ) + mam_active: IndexerConfiguration[bool] = IndexerConfiguration( + type=bool, + display_name="MAM Active", + default=True, + ) + + +class ValuedMamConfigurations(ValuedConfigurations): + mam_session_id: str + mam_active: bool + + +class MamIndexer(AbstractIndexer[MamConfigurations]): + name = "MyAnonamouse" + results: dict[str, dict[str, Any]] = dict() + + @staticmethod + async def get_configurations( + container: SessionContainer, + ) -> MamConfigurations: + return MamConfigurations() + + async def setup( + self, + request: BookRequest, + container: SessionContainer, + configurations: ValuedMamConfigurations, + ): + if not configurations.mam_active: + return + + query = request.title + " " + " ".join(request.authors) + + params: dict[str, Any] = { + "tor[text]": query, # book title + author(s) + "tor[main_cat]": [13], # MAM audiobook category + "tor[searchIn]": "torrents", + "tor[srchIn][author]": "true", + "tor[srchIn][title]": "true", + "tor[searchType]": "active", # only search for torrents with at least 1 seeder. + "startNumber": 0, + "perpage": 100, + } + + url = urljoin( + "https://www.myanonamouse.net", + f"/tor/js/loadSearchJSONbasic.php?{urlencode(params, doseq=True)}", + ) + + session_id = configurations.mam_session_id + + logger.info("Mam: Querying: %s", url) + + async with container.client_session.get( + url, cookies={"mam_id": session_id} + ) as response: + if not response.ok: + logger.error("Mam: Failed to query: %s", response.text) + return + search_results = await response.json() + + for result in search_results["data"]: + self.results[str(result["id"])] = result + logger.info("Mam: Retrieved %d results", len(self.results)) + + async def is_matching_source( + self, + source: ProwlarrSource, + container: SessionContainer, + ): + return source.info_url is not None and source.info_url.startswith( + "https://www.myanonamouse.net/t/" + ) + + async def edit_source_metadata( + self, + source: ProwlarrSource, + container: SessionContainer, + ): + mam_id = source.guid.split("/")[-1] + result = self.results.get(mam_id) + if result is None: + return + + # response type of authors and narrators is a stringified json object + source.book_metadata.authors = list( + json.loads(result.get("author_info", "{}")).values() + ) + + source.book_metadata.narrators = list( + json.loads(result.get("narrator_info", "{}")).values() + ) + + indexer_flags: set[str] = set(source.indexer_flags) + if result["personal_freeleech"] == 1: + indexer_flags.add("personal_freeleech") + indexer_flags.add("freeleech") + if result["free"] == 1: + indexer_flags.add("free") + indexer_flags.add("freeleech") + if result["fl_vip"] == 1: + indexer_flags.add("fl_vip") + indexer_flags.add("freeleech") + if result["vip"] == 1: + indexer_flags.add("vip") + + source.indexer_flags = list(indexer_flags) + + source.book_metadata.filetype = result["filetype"] diff --git a/app/internal/models.py b/app/internal/models.py index 5e8b233..d7b2636 100644 --- a/app/internal/models.py +++ b/app/internal/models.py @@ -133,6 +133,16 @@ class ManualBookRequest(BaseModel, table=True): arbitrary_types_allowed = True +class BookMetadata(BaseModel): + """extra metadata that can be added to sources to better rank them""" + + title: Optional[str] = None + subtitle: Optional[str] = None + authors: list[str] = [] + narrators: list[str] = [] + filetype: Optional[str] = None + + class BaseSource(BaseModel): guid: str indexer_id: int @@ -145,6 +155,8 @@ class BaseSource(BaseModel): download_url: Optional[str] = None magnet_url: Optional[str] = None + book_metadata: BookMetadata = BookMetadata() + @property def size_MB(self): return round(self.size / 1e6, 1) diff --git a/app/internal/prowlarr/prowlarr.py b/app/internal/prowlarr/prowlarr.py index 706dc11..65c5575 100644 --- a/app/internal/prowlarr/prowlarr.py +++ b/app/internal/prowlarr/prowlarr.py @@ -1,20 +1,23 @@ import json import logging -from datetime import datetime import posixpath +from datetime import datetime from typing import Any, Literal, Optional from urllib.parse import urlencode from aiohttp import ClientResponse, ClientSession from sqlmodel import Session +from app.internal.indexers.abstract import SessionContainer from app.internal.models import ( + BookRequest, EventEnum, ProwlarrSource, TorrentSource, UsenetSource, ) from app.internal.notifications import send_all_notifications +from app.internal.prowlarr.source_metadata import edit_source_metadata from app.util.cache import SimpleCache, StringConfigCache logger = logging.getLogger(__name__) @@ -127,12 +130,11 @@ async def start_download( async def query_prowlarr( session: Session, client_session: ClientSession, - query: Optional[str], + book_request: BookRequest, indexer_ids: Optional[list[int]] = None, force_refresh: bool = False, ) -> list[ProwlarrSource]: - if not query: - return [] + query = book_request.title base_url = prowlarr_config.get_base_url(session) api_key = prowlarr_config.get_api_key(session) @@ -217,6 +219,10 @@ async def query_prowlarr( except KeyError as e: logger.error("Failed to parse source: %s. KeyError: %s", result, e) + # add additional metadata using any available indexers + container = SessionContainer(session=session, client_session=client_session) + await edit_source_metadata(book_request, sources, container) + prowlarr_source_cache.set(sources, query) return sources diff --git a/app/internal/prowlarr/source_metadata.py b/app/internal/prowlarr/source_metadata.py new file mode 100644 index 0000000..79e528d --- /dev/null +++ b/app/internal/prowlarr/source_metadata.py @@ -0,0 +1,39 @@ +import asyncio +import logging +from types import CoroutineType +from typing import Any + +from app.internal.indexers.abstract import SessionContainer +from app.internal.indexers.indexer_util import get_indexer_contexts +from app.internal.models import BookRequest, ProwlarrSource + +logger = logging.getLogger(__name__) + + +async def edit_source_metadata( + book_request: BookRequest, + sources: list[ProwlarrSource], + container: SessionContainer, +): + contexts = await get_indexer_contexts(container) + + coros = [ + context.indexer.setup(book_request, container, context.valued) + for context in contexts + ] + exceptions = await asyncio.gather(*coros, return_exceptions=True) + for exc in exceptions: + if exc: + logger.error("Failed to setup indexer: %s", exc) + + coros: list[CoroutineType[Any, Any, None]] = [] + for source in sources: + for context in contexts: + if await context.indexer.is_matching_source(source, container): + coros.append(context.indexer.edit_source_metadata(source, container)) + break + + exceptions = await asyncio.gather(*coros, return_exceptions=True) + for exc in exceptions: + if exc: + logger.error("Failed to edit source metadata: %s", exc) diff --git a/app/internal/query.py b/app/internal/query.py index 89a9c98..b0648d7 100644 --- a/app/internal/query.py +++ b/app/internal/query.py @@ -53,15 +53,12 @@ async def query_sources( if not book: raise HTTPException(status_code=500, detail="Book asin error") - query = book.title + " " + " ".join(book.authors) - sources = await query_prowlarr( session, client_session, - query, + book, force_refresh=force_refresh, ) - ranked = await rank_sources(session, client_session, sources, book) # start download if requested diff --git a/app/internal/ranking/download_ranking.py b/app/internal/ranking/download_ranking.py index ac17924..5ff9a89 100644 --- a/app/internal/ranking/download_ranking.py +++ b/app/internal/ranking/download_ranking.py @@ -178,15 +178,29 @@ class CompareSource: return int(b_title) - int(a_title) def _compare_authors(self, a: RankSource, b: RankSource, next_compare: int) -> int: - a_score = vaguely_exist_in_title( - self.book.authors, - a.source.title, - quality_config.get_name_exists_ratio(self.session), + a_score = max( + vaguely_exist_in_title( + self.book.authors, + a.source.title, + quality_config.get_name_exists_ratio(self.session), + ), + fuzzy_author_narrator_match( + a.source.book_metadata.authors, + self.book.authors, + quality_config.get_name_exists_ratio(self.session), + ), ) - b_score = vaguely_exist_in_title( - self.book.authors, - b.source.title, - quality_config.get_name_exists_ratio(self.session), + b_score = max( + vaguely_exist_in_title( + self.book.authors, + b.source.title, + quality_config.get_name_exists_ratio(self.session), + ), + fuzzy_author_narrator_match( + b.source.book_metadata.authors, + self.book.authors, + quality_config.get_name_exists_ratio(self.session), + ), ) if a_score == b_score: return self._get_next_compare(next_compare)(a, b, next_compare + 1) @@ -195,15 +209,29 @@ class CompareSource: def _compare_narrators( self, a: RankSource, b: RankSource, next_compare: int ) -> int: - a_score = vaguely_exist_in_title( - self.book.narrators, - a.source.title, - quality_config.get_name_exists_ratio(self.session), + a_score = max( + vaguely_exist_in_title( + self.book.narrators, + a.source.title, + quality_config.get_name_exists_ratio(self.session), + ), + fuzzy_author_narrator_match( + a.source.book_metadata.narrators, + self.book.narrators, + quality_config.get_name_exists_ratio(self.session), + ), ) - b_score = vaguely_exist_in_title( - self.book.narrators, - b.source.title, - quality_config.get_name_exists_ratio(self.session), + b_score = max( + vaguely_exist_in_title( + self.book.narrators, + b.source.title, + quality_config.get_name_exists_ratio(self.session), + ), + fuzzy_author_narrator_match( + b.source.book_metadata.narrators, + self.book.narrators, + quality_config.get_name_exists_ratio(self.session), + ), ) if a_score == b_score: return self._get_next_compare(next_compare)(a, b, next_compare + 1) @@ -226,6 +254,28 @@ class CompareSource: return int((b.source.publish_date - a.source.publish_date).total_seconds()) +def fuzzy_author_narrator_match( + source_people: list[str], book_people: list[str], name_exists_ratio: int +) -> int: + """Calculate a fuzzy matching score between two lists of author/narrator names.""" + if not source_people or not book_people: + return 0 + score = 0 + for book_person in book_people: + best_match = 0 + for source_person in source_people: + match_score = fuzz.token_set_ratio( + book_person, source_person, processor=utils.default_process + ) + best_match = max(best_match, match_score) + + # Only count matches above threshold + if best_match > name_exists_ratio: + score += 1 + + return score + + def vaguely_exist_in_title(words: list[str], title: str, name_exists_ratio: int) -> int: return sum( 1 diff --git a/app/routers/settings.py b/app/routers/settings.py index e558ec8..f551bcc 100644 --- a/app/routers/settings.py +++ b/app/routers/settings.py @@ -16,6 +16,9 @@ from app.internal.auth.authentication import ( from app.internal.auth.config import LoginTypeEnum, auth_config from app.internal.auth.oidc_config import oidc_config from app.internal.env_settings import Settings +from app.internal.indexers.abstract import SessionContainer +from app.internal.indexers.configuration import indexer_configuration_cache +from app.internal.indexers.indexer_util import IndexerContext, get_indexer_contexts from app.internal.models import EventEnum, GroupEnum, Notification, User from app.internal.notifications import send_notification from app.internal.prowlarr.indexer_categories import indexer_categories @@ -703,3 +706,67 @@ async def update_security( block_name="form", headers={} if old == login_type else {"HX-Refresh": "true"}, ) + + +@router.get("/indexers") +async def read_indexers( + request: Request, + admin_user: Annotated[ + DetailedUser, Depends(get_authenticated_user(GroupEnum.admin)) + ], + session: Annotated[Session, Depends(get_session)], + client_session: Annotated[ClientSession, Depends(get_connection)], +): + contexts = await get_indexer_contexts( + SessionContainer(session=session, client_session=client_session), + check_required=False, + ) + + return template_response( + "settings_page/indexers.html", + request, + admin_user, + { + "page": "indexers", + "indexers": contexts, + "version": Settings().app.version, + }, + ) + + +@router.post("/indexers") +async def update_indexers( + request: Request, + admin_user: Annotated[ + DetailedUser, Depends(get_authenticated_user(GroupEnum.admin)) + ], + indexer_select: Annotated[str, Form()], + session: Annotated[Session, Depends(get_session)], + client_session: Annotated[ClientSession, Depends(get_connection)], +): + contexts = await get_indexer_contexts( + SessionContainer(session=session, client_session=client_session), + check_required=False, + ) + + updated_context: Optional[IndexerContext] = None + for context in contexts: + if context.indexer.name == indexer_select: + updated_context = context + break + + if not updated_context: + raise ToastException("Indexer not found", "error") + + form_values = await request.form() + + for key, value in form_values.items(): + if key in updated_context.configuration and type(value) is str: + if updated_context.configuration[key].type is bool: + indexer_configuration_cache.set( + session, key, "true" if value == "on" else "" + ) + else: + indexer_configuration_cache.set(session, key, str(value)) + + raise ToastException("Indexers updated", "success") diff --git a/app/util/templates.py b/app/util/templates.py index 45f6ba3..b2e5217 100644 --- a/app/util/templates.py +++ b/app/util/templates.py @@ -10,6 +10,8 @@ from app.internal.auth.authentication import DetailedUser templates = Jinja2Blocks(directory="templates") templates.env.filters["quote_plus"] = lambda u: quote_plus(u) # pyright: ignore[reportUnknownLambdaType,reportUnknownMemberType,reportUnknownArgumentType] templates.env.filters["zfill"] = lambda val, num: str(val).zfill(num) # pyright: ignore[reportUnknownLambdaType,reportUnknownMemberType,reportUnknownArgumentType] +templates.env.globals["vars"] = vars # pyright: ignore[reportUnknownMemberType] +templates.env.globals["getattr"] = getattr # pyright: ignore[reportUnknownMemberType] @overload diff --git a/templates/scripts/alpinejs.html b/templates/scripts/alpinejs.html new file mode 100644 index 0000000..2f63b12 --- /dev/null +++ b/templates/scripts/alpinejs.html @@ -0,0 +1,4 @@ + diff --git a/templates/settings_page/base.html b/templates/settings_page/base.html index b3c8037..cbdf84a 100644 --- a/templates/settings_page/base.html +++ b/templates/settings_page/base.html @@ -55,6 +55,13 @@ class="tab {% if page=='security' %}tab-active{% endif %}" >Security + Indexers {% endif %} {% block content %} {% endblock %} diff --git a/templates/settings_page/indexers.html b/templates/settings_page/indexers.html new file mode 100644 index 0000000..b87ae02 --- /dev/null +++ b/templates/settings_page/indexers.html @@ -0,0 +1,83 @@ +{% extends "settings_page/base.html" %} {% block head %} +