mirror of
https://github.com/rio-labs/rio.git
synced 2026-02-09 07:09:00 -06:00
rework URL normalization
This commit is contained in:
@@ -65,11 +65,7 @@ class AbstractAppServer(abc.ABC):
|
||||
# This is currently handled as follows: If a base URL is specified, it
|
||||
# must contain the full URL. If no base URL is specified, the URL
|
||||
# provided in the request is used instead.
|
||||
if base_url is None:
|
||||
self.base_url = None
|
||||
else:
|
||||
self.base_url = utils.normalize_url(base_url)
|
||||
|
||||
self.base_url = base_url
|
||||
self._session_serve_tasks = dict[rio.Session, asyncio.Task[object]]()
|
||||
self._disconnected_sessions = dict[rio.Session, float]()
|
||||
|
||||
@@ -248,7 +244,10 @@ class AbstractAppServer(abc.ABC):
|
||||
|
||||
## Raises
|
||||
|
||||
`NavigationFailed`: If a page guard crashes
|
||||
`NavigationFailed`: If a page guard crashes.
|
||||
|
||||
`NavigationFailed`: If the initial page URL is not a child of the app's
|
||||
base URL.
|
||||
"""
|
||||
# Normalize and deduplicate the languages
|
||||
preferred_languages: list[str] = []
|
||||
@@ -324,7 +323,7 @@ class AbstractAppServer(abc.ABC):
|
||||
# Prepare the initial URL. This will be exposed to the session as the
|
||||
# `active_page_url`, but overridden later once the page guards have been
|
||||
# run.
|
||||
initial_page_url = rio.URL(initial_message.url.lower())
|
||||
initial_page_url = rio.URL(initial_message.url)
|
||||
|
||||
# Determine the base URL. If one was explicitly provided when creating
|
||||
# the app server, use that. Otherwise derive one from the initial HTTP
|
||||
@@ -333,7 +332,6 @@ class AbstractAppServer(abc.ABC):
|
||||
base_url = (
|
||||
initial_page_url.with_path("").with_query("").with_fragment("")
|
||||
)
|
||||
base_url = utils.normalize_url(base_url)
|
||||
else:
|
||||
base_url = self.base_url
|
||||
|
||||
@@ -421,9 +419,17 @@ class AbstractAppServer(abc.ABC):
|
||||
active_page_url_absolute,
|
||||
) = routing.check_page_guards(sess, initial_page_url)
|
||||
|
||||
# The initial page URL can be problematic. Logically it must obviously
|
||||
# be a child of the app's base URL, but that may not be the case if a
|
||||
# malicious URL was sent or the base misconfigured. Check for this.
|
||||
if active_page_instances_and_path_arguments is None:
|
||||
raise rio.NavigationFailed(
|
||||
f"The initial page URL `{initial_page_url}` is not a child of the app's base URL `{self.base_url}`."
|
||||
)
|
||||
|
||||
# Is this a page, or a full URL to another site?
|
||||
try:
|
||||
utils.make_url_relative(
|
||||
utils.url_relative_to_base(
|
||||
sess._base_url,
|
||||
active_page_url_absolute,
|
||||
)
|
||||
|
||||
@@ -11,7 +11,7 @@ import rio
|
||||
import rio.app_server.fastapi_server
|
||||
import rio.cli
|
||||
|
||||
from ... import nice_traceback, utils
|
||||
from ... import nice_traceback
|
||||
from . import run_models
|
||||
|
||||
|
||||
@@ -140,17 +140,13 @@ class UvicornWorker:
|
||||
# Store the new app
|
||||
self.app_server = app_server
|
||||
|
||||
self.app_server.base_url = self.base_url
|
||||
self.app_server.debug_mode = self.debug_mode
|
||||
self.app_server.running_in_window = self.run_in_window
|
||||
self.app_server.internal_on_app_start = (
|
||||
lambda: self.on_server_is_ready_or_failed.set_result(None)
|
||||
)
|
||||
|
||||
if self.base_url is None:
|
||||
self.app_server.base_url = None
|
||||
else:
|
||||
self.app_server.base_url = utils.normalize_url(self.base_url)
|
||||
|
||||
# There is no need to inject the new app or server anywhere. Since
|
||||
# uvicorn was fed a shim function instead of the app directly, any
|
||||
# requests will automatically be redirected to the new server instance.
|
||||
|
||||
@@ -588,7 +588,8 @@ def check_page_guards(
|
||||
tuple[
|
||||
tuple[ComponentPage, dict[str, object]],
|
||||
...,
|
||||
],
|
||||
]
|
||||
| None,
|
||||
rio.URL,
|
||||
]:
|
||||
"""
|
||||
@@ -605,6 +606,10 @@ def check_page_guards(
|
||||
error. The result will still be valid. That is because navigation is
|
||||
possible, it's just that some `PageView`s will display a 404 page.
|
||||
|
||||
If attempting to navigate to a URL that isn't path of the app, i.e. it's not
|
||||
a child of the app's configured base URL, `None` is returned instead of
|
||||
active pages.
|
||||
|
||||
This function does not perform any actual navigation. It simply checks
|
||||
whether navigation to the target page is possible.
|
||||
|
||||
@@ -627,11 +632,13 @@ def check_page_guards(
|
||||
past_redirects = [target_url_absolute]
|
||||
|
||||
while True:
|
||||
# TODO: What if the URL is not a child of the base URL? i.e. redirecting
|
||||
# to a completely different site
|
||||
target_url_relative = utils.make_url_relative(
|
||||
sess._base_url, target_url_absolute
|
||||
)
|
||||
# Strip the base. This also detects navigation to outside of the app
|
||||
try:
|
||||
target_url_relative = utils.url_relative_to_base(
|
||||
sess._base_url, target_url_absolute
|
||||
)
|
||||
except ValueError:
|
||||
return None, target_url_absolute
|
||||
|
||||
# Find all pages which would by activated by this navigation
|
||||
active_page_instances_and_path_arguments = tuple(
|
||||
|
||||
@@ -1008,35 +1008,12 @@ window.resizeTo(screen.availWidth, screen.availHeight);
|
||||
replaced with the new page. If `False`, a new history entry is
|
||||
created, allowing the user to go back to the previous page.
|
||||
"""
|
||||
# Normalize the target URL
|
||||
target_url = utils.normalize_url(target_url)
|
||||
if isinstance(target_url, str):
|
||||
target_url = rio.URL(target_url)
|
||||
|
||||
# Determine the full page to navigate to
|
||||
target_url_absolute = self.active_page_url.join(target_url)
|
||||
|
||||
# Is this a page, or a full URL to another site?
|
||||
try:
|
||||
utils.make_url_relative(
|
||||
self._base_url,
|
||||
target_url_absolute,
|
||||
)
|
||||
|
||||
# This is an external URL. Let the browser handle the navigation.
|
||||
except ValueError:
|
||||
logging.debug(
|
||||
f"Navigating to external site `{target_url_absolute}`"
|
||||
)
|
||||
|
||||
async def history_worker() -> None:
|
||||
await self._evaluate_javascript(
|
||||
f"""
|
||||
window.location.href = {json.dumps(str(target_url))};
|
||||
""",
|
||||
)
|
||||
|
||||
self.create_task(history_worker(), name="history worker")
|
||||
return
|
||||
|
||||
# Is any guard opposed to this page?
|
||||
active_page_instances_and_path_arguments, active_page_url = (
|
||||
routing.check_page_guards(self, target_url_absolute)
|
||||
@@ -1044,6 +1021,25 @@ window.location.href = {json.dumps(str(target_url))};
|
||||
|
||||
del target_url, target_url_absolute
|
||||
|
||||
# Is this one of the app's pages, or a full URL to another site? If
|
||||
# navigating to another site, let the browser handle the navigation.
|
||||
#
|
||||
# TODO: What if running in a window? Navigating to another site doesn't
|
||||
# make any sense in that case, since the window isn't supposed to be a
|
||||
# browser.
|
||||
if active_page_instances_and_path_arguments is None:
|
||||
logging.debug(f"Navigating to external site `{active_page_url}`")
|
||||
|
||||
async def history_worker() -> None:
|
||||
await self._evaluate_javascript(
|
||||
f"""
|
||||
window.location.href = {json.dumps(str(active_page_url))};
|
||||
""",
|
||||
)
|
||||
|
||||
self.create_task(history_worker(), name="history worker")
|
||||
return
|
||||
|
||||
# Update the current page
|
||||
logging.debug(f"Navigating to page `{active_page_url}`")
|
||||
self._active_page_url = active_page_url
|
||||
@@ -3505,34 +3501,38 @@ a.remove();
|
||||
|
||||
@unicall.local(name="openUrl")
|
||||
async def _open_url(self, url: str) -> None:
|
||||
# Case: Running as website
|
||||
#
|
||||
# If running in a browser, JS can take care of changing the URL or
|
||||
# opening a new tab. The only reason why the frontend would tell us
|
||||
# to open the url is because it's a local url.
|
||||
#
|
||||
# (Note: Of course an attacker could send us an external url to
|
||||
# open, but `navigate_to` has to handle that gracefully anyway.)
|
||||
if self.running_as_website:
|
||||
# If running in a browser, JS can take care of changing the URL or
|
||||
# opening a new tab. The only reason why the frontend would tell us
|
||||
# to open the url is because it's a local url.
|
||||
#
|
||||
# (Note: Of course an attacker could send us an external url to
|
||||
# open, but `navigate_to` has to handle that gracefully anyway.)
|
||||
self.navigate_to(url)
|
||||
return
|
||||
|
||||
# Normalize the URL to make for easier comparisons
|
||||
yarl_url = utils.normalize_url(url)
|
||||
# Case: Running in a window.
|
||||
yarl_url = rio.URL(url)
|
||||
del url
|
||||
|
||||
# If running_in_window, local urls are *always* navigated to, even if
|
||||
# they're meant to be opened in a new tab. The `run_in_window` code
|
||||
# isn't designed to handle multiple sessions, so we can't open a new
|
||||
# tab or a 2nd window.
|
||||
is_local_url = yarl_url.path.startswith(self._base_url.path)
|
||||
|
||||
if is_local_url:
|
||||
self.navigate_to(yarl_url)
|
||||
return
|
||||
try:
|
||||
utils.url_relative_to_base(yarl_url, self._base_url)
|
||||
|
||||
# And if it's an external url, it must be opened in a web browser.
|
||||
import webbrowser
|
||||
except ValueError:
|
||||
import webbrowser
|
||||
|
||||
webbrowser.open(str(yarl_url))
|
||||
webbrowser.open(str(yarl_url))
|
||||
|
||||
# Local URL
|
||||
else:
|
||||
self.navigate_to(yarl_url)
|
||||
|
||||
@unicall.local(name="ping")
|
||||
async def _ping(self, ping: str) -> str:
|
||||
@@ -3544,6 +3544,10 @@ a.remove();
|
||||
Called by the client when the page changes.
|
||||
"""
|
||||
# Try to navigate to the new page
|
||||
import revel
|
||||
|
||||
revel.debug(f"Called navigate to {new_url}")
|
||||
|
||||
self.navigate_to(
|
||||
new_url,
|
||||
replace=True,
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
# <additional-imports>
|
||||
# </additional-imports>
|
||||
from dataclasses import field
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
@@ -10,8 +12,6 @@ import rio
|
||||
from .. import components as comps
|
||||
from .. import data_models
|
||||
|
||||
# </additional-imports>
|
||||
|
||||
|
||||
# <component>
|
||||
@rio.page(
|
||||
|
||||
@@ -3,12 +3,13 @@ from __future__ import annotations
|
||||
# <additional-imports>
|
||||
import datetime
|
||||
|
||||
# </additional-imports>
|
||||
from dataclasses import field
|
||||
|
||||
import rio
|
||||
|
||||
from ..data_models import TodoItem
|
||||
|
||||
# </additional-imports>
|
||||
|
||||
|
||||
# <component>
|
||||
class NewTodoItemInput(rio.Component):
|
||||
|
||||
70
rio/utils.py
70
rio/utils.py
@@ -356,21 +356,28 @@ class FileInfo:
|
||||
raise ValueError("Invalid type. Expected 'r' or 'rb'.")
|
||||
|
||||
|
||||
def make_url_relative(base: URL, other: URL) -> URL:
|
||||
def url_relative_to_base(base: URL, other: URL) -> URL:
|
||||
"""
|
||||
Returns `other` as a relative URL to `base`. Raises a `ValueError` if
|
||||
`other` is not a child of `base`.
|
||||
`other` is not a child of `base`. This will never generate URLs containing
|
||||
`..`. If those would be needed a `ValueError` is raised instead.
|
||||
|
||||
This will never generate URLs containing `..`. If those would be needed a
|
||||
`ValueError` is raised instead.
|
||||
## Casing
|
||||
|
||||
The scheme and host are case insensitive, as by the URL standard. The path
|
||||
is case sensitive, but a warning will be logged should that be the only
|
||||
reason two URLs are not considered equal.
|
||||
"""
|
||||
# Verify the URLs have the same scheme and host
|
||||
if base.scheme != other.scheme:
|
||||
if base.scheme.lower() != other.scheme.lower():
|
||||
raise ValueError(
|
||||
f'URLs have different schemes: "{base.scheme}" and "{other.scheme}"'
|
||||
)
|
||||
|
||||
if base.host != other.host:
|
||||
base_host = None if base.host is None else base.host.lower()
|
||||
other_host = None if other.host is None else other.host.lower()
|
||||
|
||||
if base_host != other_host:
|
||||
raise ValueError(
|
||||
f'URLs have different hosts: "{base.host}" and "{other.host}"'
|
||||
)
|
||||
@@ -387,8 +394,16 @@ def make_url_relative(base: URL, other: URL) -> URL:
|
||||
other_parts = other_parts[:-1]
|
||||
|
||||
# See if the base is a parent of the other URL
|
||||
if base_parts != other_parts[: len(base_parts)]:
|
||||
raise ValueError(f'"{other}" is not a child of "{base}"')
|
||||
for base_part, other_part in zip(base_parts, other_parts):
|
||||
if base_part != other_part:
|
||||
# Log a warning if the only difference is casing
|
||||
if base_part.lower() == other_part.lower():
|
||||
message = f"`{base}` is not considered a base URL of `{other}`, because the URL paths are cased differently. This is considered a different, as by the URL specification. If you were trying to navigate to your own site and it didn't pick this up check your spelling."
|
||||
raise ValueError(message)
|
||||
|
||||
raise ValueError(
|
||||
f"`{base}` is not a base URL of `{other}`. The paths differ at `{base_part}` and `{other_part}`"
|
||||
)
|
||||
|
||||
# Remove the common parts from the URL
|
||||
other_parts = other_parts[len(base_parts) :]
|
||||
@@ -566,30 +581,33 @@ def safe_build(
|
||||
return placeholder_component
|
||||
|
||||
|
||||
def normalize_url(url: str | rio.URL) -> rio.URL:
|
||||
"""
|
||||
Returns a normalized version of the given URL.
|
||||
# def normalize_url(url: str | rio.URL) -> rio.URL:
|
||||
# """
|
||||
# Returns a normalized version of the given URL.
|
||||
|
||||
This returns a new URL instance which is identical to the given URL, but
|
||||
with the following guarantees:
|
||||
# This returns a new URL instance which is identical to the given URL, but
|
||||
# with the following guarantees:
|
||||
|
||||
- The origin is lowercase
|
||||
- The URL has no trailing slashes
|
||||
"""
|
||||
url = rio.URL(url)
|
||||
# - The protocol, host and path are lowercase
|
||||
# - The URL has no trailing slashes
|
||||
|
||||
if url.scheme:
|
||||
url = url.with_scheme(url.scheme.lower())
|
||||
# Query parameters and fragments are considered case-sensitive, and are not
|
||||
# modified.
|
||||
# """
|
||||
# url = rio.URL(url)
|
||||
|
||||
if url.host is not None:
|
||||
url = url.with_host(url.host.lower())
|
||||
# if url.scheme:
|
||||
# url = url.with_scheme(url.scheme.lower())
|
||||
|
||||
if url.path != "/":
|
||||
# Doing `url.with_path(url.path)` erases the query parameters.
|
||||
# Unbelievable.
|
||||
url = url.with_path(url.path.rstrip("/")).with_query(url.query)
|
||||
# if url.host is not None:
|
||||
# url = url.with_host(url.host.lower())
|
||||
|
||||
return url
|
||||
# if url.path != "/":
|
||||
# # Doing `url.with_path(url.path)` erases the query parameters.
|
||||
# # Unbelievable.
|
||||
# url = url.with_path(url.path.rstrip("/").lower()).with_query(url.query)
|
||||
|
||||
# return url
|
||||
|
||||
|
||||
def is_python_script(path: Path) -> bool:
|
||||
|
||||
@@ -131,18 +131,18 @@ def test_redirects(
|
||||
# Create a fake session. It contains everything used by the routing system.
|
||||
app = rio.App(pages=PAGES)
|
||||
app_server = rio.app_server.TestingServer(app)
|
||||
fake_session = app_server.create_dummy_session()
|
||||
session = app_server.create_dummy_session()
|
||||
|
||||
# Determine the final URL
|
||||
active_pages_and_path_arguments, absolute_url_after_redirects_is = (
|
||||
rio.routing.check_page_guards(
|
||||
fake_session,
|
||||
fake_session._base_url.join(rio.URL(relative_url_before_redirects)),
|
||||
session,
|
||||
session._base_url.join(rio.URL(relative_url_before_redirects)),
|
||||
)
|
||||
)
|
||||
|
||||
# Verify the final URL
|
||||
absolute_url_after_redirects_should = fake_session._base_url.join(
|
||||
absolute_url_after_redirects_should = session._base_url.join(
|
||||
rio.URL(relative_url_after_redirects_should)
|
||||
)
|
||||
|
||||
@@ -179,6 +179,34 @@ def test_url_parameter_parsing_failure() -> None:
|
||||
session._base_url.join(rio.URL("/3.5")),
|
||||
)
|
||||
|
||||
assert isinstance(active_pages_and_path_arguments, list)
|
||||
assert len(active_pages_and_path_arguments) == 1
|
||||
assert active_pages_and_path_arguments[0][0] == float_page
|
||||
assert active_pages_and_path_arguments[0][1] == {"path_param": 3.5}
|
||||
|
||||
|
||||
def test_redirect_offsite() -> None:
|
||||
"""
|
||||
Redirect to a site other than this app.
|
||||
"""
|
||||
page = rio.ComponentPage(
|
||||
name="Home",
|
||||
url_segment="",
|
||||
build=FakeComponent,
|
||||
)
|
||||
|
||||
app = rio.App(pages=(page,))
|
||||
app_server = rio.app_server.TestingServer(app)
|
||||
session = app_server.create_dummy_session()
|
||||
|
||||
external_url = rio.URL("http://example.com")
|
||||
|
||||
active_pages_and_path_arguments, absolute_url_after_redirects = (
|
||||
rio.routing.check_page_guards(
|
||||
session,
|
||||
external_url,
|
||||
)
|
||||
)
|
||||
|
||||
assert active_pages_and_path_arguments is None
|
||||
assert absolute_url_after_redirects == external_url
|
||||
|
||||
Reference in New Issue
Block a user