From bbeb3e8e87bca3857cf28293a0db36009547cc7e Mon Sep 17 00:00:00 2001 From: Jakob Pinterits Date: Sat, 22 Feb 2025 21:10:28 +0100 Subject: [PATCH] rework URL normalization --- rio/app_server/abstract_app_server.py | 24 ++++-- rio/cli/run_project/uvicorn_worker.py | 8 +- rio/routing.py | 19 +++-- rio/session.py | 84 ++++++++++--------- .../pages/dashboard_page.py | 4 +- .../components/new_todo_item_input.py | 5 +- rio/utils.py | 70 ++++++++++------ tests/test_routing.py | 36 +++++++- 8 files changed, 155 insertions(+), 95 deletions(-) diff --git a/rio/app_server/abstract_app_server.py b/rio/app_server/abstract_app_server.py index 04464095..27822cb1 100644 --- a/rio/app_server/abstract_app_server.py +++ b/rio/app_server/abstract_app_server.py @@ -65,11 +65,7 @@ class AbstractAppServer(abc.ABC): # This is currently handled as follows: If a base URL is specified, it # must contain the full URL. If no base URL is specified, the URL # provided in the request is used instead. - if base_url is None: - self.base_url = None - else: - self.base_url = utils.normalize_url(base_url) - + self.base_url = base_url self._session_serve_tasks = dict[rio.Session, asyncio.Task[object]]() self._disconnected_sessions = dict[rio.Session, float]() @@ -248,7 +244,10 @@ class AbstractAppServer(abc.ABC): ## Raises - `NavigationFailed`: If a page guard crashes + `NavigationFailed`: If a page guard crashes. + + `NavigationFailed`: If the initial page URL is not a child of the app's + base URL. """ # Normalize and deduplicate the languages preferred_languages: list[str] = [] @@ -324,7 +323,7 @@ class AbstractAppServer(abc.ABC): # Prepare the initial URL. This will be exposed to the session as the # `active_page_url`, but overridden later once the page guards have been # run. - initial_page_url = rio.URL(initial_message.url.lower()) + initial_page_url = rio.URL(initial_message.url) # Determine the base URL. If one was explicitly provided when creating # the app server, use that. Otherwise derive one from the initial HTTP @@ -333,7 +332,6 @@ class AbstractAppServer(abc.ABC): base_url = ( initial_page_url.with_path("").with_query("").with_fragment("") ) - base_url = utils.normalize_url(base_url) else: base_url = self.base_url @@ -421,9 +419,17 @@ class AbstractAppServer(abc.ABC): active_page_url_absolute, ) = routing.check_page_guards(sess, initial_page_url) + # The initial page URL can be problematic. Logically it must obviously + # be a child of the app's base URL, but that may not be the case if a + # malicious URL was sent or the base misconfigured. Check for this. + if active_page_instances_and_path_arguments is None: + raise rio.NavigationFailed( + f"The initial page URL `{initial_page_url}` is not a child of the app's base URL `{self.base_url}`." + ) + # Is this a page, or a full URL to another site? try: - utils.make_url_relative( + utils.url_relative_to_base( sess._base_url, active_page_url_absolute, ) diff --git a/rio/cli/run_project/uvicorn_worker.py b/rio/cli/run_project/uvicorn_worker.py index 3302b166..7bf9edd0 100644 --- a/rio/cli/run_project/uvicorn_worker.py +++ b/rio/cli/run_project/uvicorn_worker.py @@ -11,7 +11,7 @@ import rio import rio.app_server.fastapi_server import rio.cli -from ... import nice_traceback, utils +from ... import nice_traceback from . import run_models @@ -140,17 +140,13 @@ class UvicornWorker: # Store the new app self.app_server = app_server + self.app_server.base_url = self.base_url self.app_server.debug_mode = self.debug_mode self.app_server.running_in_window = self.run_in_window self.app_server.internal_on_app_start = ( lambda: self.on_server_is_ready_or_failed.set_result(None) ) - if self.base_url is None: - self.app_server.base_url = None - else: - self.app_server.base_url = utils.normalize_url(self.base_url) - # There is no need to inject the new app or server anywhere. Since # uvicorn was fed a shim function instead of the app directly, any # requests will automatically be redirected to the new server instance. diff --git a/rio/routing.py b/rio/routing.py index 0f47f821..47a6293b 100644 --- a/rio/routing.py +++ b/rio/routing.py @@ -588,7 +588,8 @@ def check_page_guards( tuple[ tuple[ComponentPage, dict[str, object]], ..., - ], + ] + | None, rio.URL, ]: """ @@ -605,6 +606,10 @@ def check_page_guards( error. The result will still be valid. That is because navigation is possible, it's just that some `PageView`s will display a 404 page. + If attempting to navigate to a URL that isn't path of the app, i.e. it's not + a child of the app's configured base URL, `None` is returned instead of + active pages. + This function does not perform any actual navigation. It simply checks whether navigation to the target page is possible. @@ -627,11 +632,13 @@ def check_page_guards( past_redirects = [target_url_absolute] while True: - # TODO: What if the URL is not a child of the base URL? i.e. redirecting - # to a completely different site - target_url_relative = utils.make_url_relative( - sess._base_url, target_url_absolute - ) + # Strip the base. This also detects navigation to outside of the app + try: + target_url_relative = utils.url_relative_to_base( + sess._base_url, target_url_absolute + ) + except ValueError: + return None, target_url_absolute # Find all pages which would by activated by this navigation active_page_instances_and_path_arguments = tuple( diff --git a/rio/session.py b/rio/session.py index d0eaabe3..4701a29b 100644 --- a/rio/session.py +++ b/rio/session.py @@ -1008,35 +1008,12 @@ window.resizeTo(screen.availWidth, screen.availHeight); replaced with the new page. If `False`, a new history entry is created, allowing the user to go back to the previous page. """ - # Normalize the target URL - target_url = utils.normalize_url(target_url) + if isinstance(target_url, str): + target_url = rio.URL(target_url) # Determine the full page to navigate to target_url_absolute = self.active_page_url.join(target_url) - # Is this a page, or a full URL to another site? - try: - utils.make_url_relative( - self._base_url, - target_url_absolute, - ) - - # This is an external URL. Let the browser handle the navigation. - except ValueError: - logging.debug( - f"Navigating to external site `{target_url_absolute}`" - ) - - async def history_worker() -> None: - await self._evaluate_javascript( - f""" -window.location.href = {json.dumps(str(target_url))}; -""", - ) - - self.create_task(history_worker(), name="history worker") - return - # Is any guard opposed to this page? active_page_instances_and_path_arguments, active_page_url = ( routing.check_page_guards(self, target_url_absolute) @@ -1044,6 +1021,25 @@ window.location.href = {json.dumps(str(target_url))}; del target_url, target_url_absolute + # Is this one of the app's pages, or a full URL to another site? If + # navigating to another site, let the browser handle the navigation. + # + # TODO: What if running in a window? Navigating to another site doesn't + # make any sense in that case, since the window isn't supposed to be a + # browser. + if active_page_instances_and_path_arguments is None: + logging.debug(f"Navigating to external site `{active_page_url}`") + + async def history_worker() -> None: + await self._evaluate_javascript( + f""" +window.location.href = {json.dumps(str(active_page_url))}; +""", + ) + + self.create_task(history_worker(), name="history worker") + return + # Update the current page logging.debug(f"Navigating to page `{active_page_url}`") self._active_page_url = active_page_url @@ -3505,34 +3501,38 @@ a.remove(); @unicall.local(name="openUrl") async def _open_url(self, url: str) -> None: + # Case: Running as website + # + # If running in a browser, JS can take care of changing the URL or + # opening a new tab. The only reason why the frontend would tell us + # to open the url is because it's a local url. + # + # (Note: Of course an attacker could send us an external url to + # open, but `navigate_to` has to handle that gracefully anyway.) if self.running_as_website: - # If running in a browser, JS can take care of changing the URL or - # opening a new tab. The only reason why the frontend would tell us - # to open the url is because it's a local url. - # - # (Note: Of course an attacker could send us an external url to - # open, but `navigate_to` has to handle that gracefully anyway.) self.navigate_to(url) return - # Normalize the URL to make for easier comparisons - yarl_url = utils.normalize_url(url) + # Case: Running in a window. + yarl_url = rio.URL(url) del url # If running_in_window, local urls are *always* navigated to, even if # they're meant to be opened in a new tab. The `run_in_window` code # isn't designed to handle multiple sessions, so we can't open a new # tab or a 2nd window. - is_local_url = yarl_url.path.startswith(self._base_url.path) - - if is_local_url: - self.navigate_to(yarl_url) - return + try: + utils.url_relative_to_base(yarl_url, self._base_url) # And if it's an external url, it must be opened in a web browser. - import webbrowser + except ValueError: + import webbrowser - webbrowser.open(str(yarl_url)) + webbrowser.open(str(yarl_url)) + + # Local URL + else: + self.navigate_to(yarl_url) @unicall.local(name="ping") async def _ping(self, ping: str) -> str: @@ -3544,6 +3544,10 @@ a.remove(); Called by the client when the page changes. """ # Try to navigate to the new page + import revel + + revel.debug(f"Called navigate to {new_url}") + self.navigate_to( new_url, replace=True, diff --git a/rio/snippets/snippet-files/project-template-Crypto Dashboard/pages/dashboard_page.py b/rio/snippets/snippet-files/project-template-Crypto Dashboard/pages/dashboard_page.py index 9b99146f..97bcf632 100644 --- a/rio/snippets/snippet-files/project-template-Crypto Dashboard/pages/dashboard_page.py +++ b/rio/snippets/snippet-files/project-template-Crypto Dashboard/pages/dashboard_page.py @@ -1,4 +1,6 @@ # +# +from dataclasses import field from pathlib import Path import numpy as np @@ -10,8 +12,6 @@ import rio from .. import components as comps from .. import data_models -# - # @rio.page( diff --git a/rio/snippets/snippet-files/project-template-Todo App/components/new_todo_item_input.py b/rio/snippets/snippet-files/project-template-Todo App/components/new_todo_item_input.py index e31417ba..dc1b6393 100644 --- a/rio/snippets/snippet-files/project-template-Todo App/components/new_todo_item_input.py +++ b/rio/snippets/snippet-files/project-template-Todo App/components/new_todo_item_input.py @@ -3,12 +3,13 @@ from __future__ import annotations # import datetime +# +from dataclasses import field + import rio from ..data_models import TodoItem -# - # class NewTodoItemInput(rio.Component): diff --git a/rio/utils.py b/rio/utils.py index adc7fc33..66046e24 100644 --- a/rio/utils.py +++ b/rio/utils.py @@ -356,21 +356,28 @@ class FileInfo: raise ValueError("Invalid type. Expected 'r' or 'rb'.") -def make_url_relative(base: URL, other: URL) -> URL: +def url_relative_to_base(base: URL, other: URL) -> URL: """ Returns `other` as a relative URL to `base`. Raises a `ValueError` if - `other` is not a child of `base`. + `other` is not a child of `base`. This will never generate URLs containing + `..`. If those would be needed a `ValueError` is raised instead. - This will never generate URLs containing `..`. If those would be needed a - `ValueError` is raised instead. + ## Casing + + The scheme and host are case insensitive, as by the URL standard. The path + is case sensitive, but a warning will be logged should that be the only + reason two URLs are not considered equal. """ # Verify the URLs have the same scheme and host - if base.scheme != other.scheme: + if base.scheme.lower() != other.scheme.lower(): raise ValueError( f'URLs have different schemes: "{base.scheme}" and "{other.scheme}"' ) - if base.host != other.host: + base_host = None if base.host is None else base.host.lower() + other_host = None if other.host is None else other.host.lower() + + if base_host != other_host: raise ValueError( f'URLs have different hosts: "{base.host}" and "{other.host}"' ) @@ -387,8 +394,16 @@ def make_url_relative(base: URL, other: URL) -> URL: other_parts = other_parts[:-1] # See if the base is a parent of the other URL - if base_parts != other_parts[: len(base_parts)]: - raise ValueError(f'"{other}" is not a child of "{base}"') + for base_part, other_part in zip(base_parts, other_parts): + if base_part != other_part: + # Log a warning if the only difference is casing + if base_part.lower() == other_part.lower(): + message = f"`{base}` is not considered a base URL of `{other}`, because the URL paths are cased differently. This is considered a different, as by the URL specification. If you were trying to navigate to your own site and it didn't pick this up check your spelling." + raise ValueError(message) + + raise ValueError( + f"`{base}` is not a base URL of `{other}`. The paths differ at `{base_part}` and `{other_part}`" + ) # Remove the common parts from the URL other_parts = other_parts[len(base_parts) :] @@ -566,30 +581,33 @@ def safe_build( return placeholder_component -def normalize_url(url: str | rio.URL) -> rio.URL: - """ - Returns a normalized version of the given URL. +# def normalize_url(url: str | rio.URL) -> rio.URL: +# """ +# Returns a normalized version of the given URL. - This returns a new URL instance which is identical to the given URL, but - with the following guarantees: +# This returns a new URL instance which is identical to the given URL, but +# with the following guarantees: - - The origin is lowercase - - The URL has no trailing slashes - """ - url = rio.URL(url) +# - The protocol, host and path are lowercase +# - The URL has no trailing slashes - if url.scheme: - url = url.with_scheme(url.scheme.lower()) +# Query parameters and fragments are considered case-sensitive, and are not +# modified. +# """ +# url = rio.URL(url) - if url.host is not None: - url = url.with_host(url.host.lower()) +# if url.scheme: +# url = url.with_scheme(url.scheme.lower()) - if url.path != "/": - # Doing `url.with_path(url.path)` erases the query parameters. - # Unbelievable. - url = url.with_path(url.path.rstrip("/")).with_query(url.query) +# if url.host is not None: +# url = url.with_host(url.host.lower()) - return url +# if url.path != "/": +# # Doing `url.with_path(url.path)` erases the query parameters. +# # Unbelievable. +# url = url.with_path(url.path.rstrip("/").lower()).with_query(url.query) + +# return url def is_python_script(path: Path) -> bool: diff --git a/tests/test_routing.py b/tests/test_routing.py index dec08872..c87c704d 100644 --- a/tests/test_routing.py +++ b/tests/test_routing.py @@ -131,18 +131,18 @@ def test_redirects( # Create a fake session. It contains everything used by the routing system. app = rio.App(pages=PAGES) app_server = rio.app_server.TestingServer(app) - fake_session = app_server.create_dummy_session() + session = app_server.create_dummy_session() # Determine the final URL active_pages_and_path_arguments, absolute_url_after_redirects_is = ( rio.routing.check_page_guards( - fake_session, - fake_session._base_url.join(rio.URL(relative_url_before_redirects)), + session, + session._base_url.join(rio.URL(relative_url_before_redirects)), ) ) # Verify the final URL - absolute_url_after_redirects_should = fake_session._base_url.join( + absolute_url_after_redirects_should = session._base_url.join( rio.URL(relative_url_after_redirects_should) ) @@ -179,6 +179,34 @@ def test_url_parameter_parsing_failure() -> None: session._base_url.join(rio.URL("/3.5")), ) + assert isinstance(active_pages_and_path_arguments, list) assert len(active_pages_and_path_arguments) == 1 assert active_pages_and_path_arguments[0][0] == float_page assert active_pages_and_path_arguments[0][1] == {"path_param": 3.5} + + +def test_redirect_offsite() -> None: + """ + Redirect to a site other than this app. + """ + page = rio.ComponentPage( + name="Home", + url_segment="", + build=FakeComponent, + ) + + app = rio.App(pages=(page,)) + app_server = rio.app_server.TestingServer(app) + session = app_server.create_dummy_session() + + external_url = rio.URL("http://example.com") + + active_pages_and_path_arguments, absolute_url_after_redirects = ( + rio.routing.check_page_guards( + session, + external_url, + ) + ) + + assert active_pages_and_path_arguments is None + assert absolute_url_after_redirects == external_url