[WEB-4172] feat: Crawl work item links for title and favicon (#7117)

* feat: added a python bg task to crawl work item links for title and description

* fix: return meta_data in the response

* fix: add validation for accessing IP ranges

* fix: remove json.dumps

* fix: handle exception by returning None

* refactor: call find_favicon_url inside fetch_and_encode_favicon function

* chore: type hints

* fix: Handle None

* fix: remove print statementsg

* chore: added favicon and title of links

* fix: return null if no title found

* Update apiserver/plane/bgtasks/work_item_link_task.py

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* fix: remove exception handling

* fix: reduce timeout seconds

* fix: handle timeout exception

* fix: remove request timeout handling

* feat: add Link icon to issue detail links and update rendering logic

* fix: use logger for exception

---------

Co-authored-by: sangeethailango <sangeethailango21@gmail.com>
Co-authored-by: JayashTripathy <jayashtripathy371@gmail.com>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
sriram veeraghanta
2025-05-30 18:44:53 +05:30
committed by GitHub
parent 046a8a1bcf
commit 445c819fbd
3 changed files with 213 additions and 4 deletions

View File

@@ -15,6 +15,7 @@ from plane.app.serializers import IssueLinkSerializer
from plane.app.permissions import ProjectEntityPermission
from plane.db.models import IssueLink
from plane.bgtasks.issue_activities_task import issue_activity
from plane.bgtasks.work_item_link_task import crawl_work_item_link_title
from plane.utils.host import base_host
@@ -44,6 +45,9 @@ class IssueLinkViewSet(BaseViewSet):
serializer = IssueLinkSerializer(data=request.data)
if serializer.is_valid():
serializer.save(project_id=project_id, issue_id=issue_id)
crawl_work_item_link_title(
serializer.data.get("id"), serializer.data.get("url")
)
issue_activity.delay(
type="link.activity.created",
requested_data=json.dumps(serializer.data, cls=DjangoJSONEncoder),
@@ -55,6 +59,10 @@ class IssueLinkViewSet(BaseViewSet):
notification=True,
origin=base_host(request=request, is_app=True),
)
issue_link = self.get_queryset().get(id=serializer.data.get("id"))
serializer = IssueLinkSerializer(issue_link)
return Response(serializer.data, status=status.HTTP_201_CREATED)
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
@@ -66,9 +74,14 @@ class IssueLinkViewSet(BaseViewSet):
current_instance = json.dumps(
IssueLinkSerializer(issue_link).data, cls=DjangoJSONEncoder
)
serializer = IssueLinkSerializer(issue_link, data=request.data, partial=True)
if serializer.is_valid():
serializer.save()
crawl_work_item_link_title(
serializer.data.get("id"), serializer.data.get("url")
)
issue_activity.delay(
type="link.activity.updated",
requested_data=requested_data,
@@ -80,6 +93,9 @@ class IssueLinkViewSet(BaseViewSet):
notification=True,
origin=base_host(request=request, is_app=True),
)
issue_link = self.get_queryset().get(id=serializer.data.get("id"))
serializer = IssueLinkSerializer(issue_link)
return Response(serializer.data, status=status.HTTP_200_OK)
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)

View File

@@ -0,0 +1,185 @@
# Python imports
import logging
# Third party imports
from celery import shared_task
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
import base64
import ipaddress
from typing import Dict, Any
from typing import Optional
from plane.db.models import IssueLink
from plane.utils.exception_logger import log_exception
logger = logging.getLogger("plane.worker")
DEFAULT_FAVICON = "PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiIGNsYXNzPSJsdWNpZGUgbHVjaWRlLWxpbmstaWNvbiBsdWNpZGUtbGluayI+PHBhdGggZD0iTTEwIDEzYTUgNSAwIDAgMCA3LjU0LjU0bDMtM2E1IDUgMCAwIDAtNy4wNy03LjA3bC0xLjcyIDEuNzEiLz48cGF0aCBkPSJNMTQgMTFhNSA1IDAgMCAwLTcuNTQtLjU0bC0zIDNhNSA1IDAgMCAwIDcuMDcgNy4wN2wxLjcxLTEuNzEiLz48L3N2Zz4=" # noqa: E501
@shared_task
def crawl_work_item_link_title(id: str, url: str) -> None:
meta_data = crawl_work_item_link_title_and_favicon(url)
issue_link = IssueLink.objects.get(id=id)
issue_link.metadata = meta_data
issue_link.save()
def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]:
"""
Crawls a URL to extract the title and favicon.
Args:
url (str): The URL to crawl
Returns:
str: JSON string containing title and base64-encoded favicon
"""
try:
# Prevent access to private IP ranges
parsed = urlparse(url)
try:
ip = ipaddress.ip_address(parsed.hostname)
if ip.is_private or ip.is_loopback or ip.is_reserved:
raise ValueError("Access to private/internal networks is not allowed")
except ValueError:
# Not an IP address, continue with domain validation
pass
# Set up headers to mimic a real browser
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" # noqa: E501
}
# Fetch the main page
response = requests.get(url, headers=headers, timeout=2)
response.raise_for_status()
# Parse HTML
soup = BeautifulSoup(response.content, "html.parser")
# Extract title
title_tag = soup.find("title")
title = title_tag.get_text().strip() if title_tag else None
# Fetch and encode favicon
favicon_base64 = fetch_and_encode_favicon(headers, soup, url)
# Prepare result
result = {
"title": title,
"favicon": favicon_base64["favicon_base64"],
"url": url,
"favicon_url": favicon_base64["favicon_url"],
}
return result
except requests.RequestException as e:
log_exception(e)
return {
"error": f"Request failed: {str(e)}",
"title": None,
"favicon": None,
"url": url,
}
except Exception as e:
log_exception(e)
return {
"error": f"Unexpected error: {str(e)}",
"title": None,
"favicon": None,
"url": url,
}
def find_favicon_url(soup: BeautifulSoup, base_url: str) -> Optional[str]:
"""
Find the favicon URL from HTML soup.
Args:
soup: BeautifulSoup object
base_url: Base URL for resolving relative paths
Returns:
str: Absolute URL to favicon or None
"""
# Look for various favicon link tags
favicon_selectors = [
'link[rel="icon"]',
'link[rel="shortcut icon"]',
'link[rel="apple-touch-icon"]',
'link[rel="apple-touch-icon-precomposed"]',
]
for selector in favicon_selectors:
favicon_tag = soup.select_one(selector)
if favicon_tag and favicon_tag.get("href"):
return urljoin(base_url, favicon_tag["href"])
# Fallback to /favicon.ico
parsed_url = urlparse(base_url)
fallback_url = f"{parsed_url.scheme}://{parsed_url.netloc}/favicon.ico"
# Check if fallback exists
try:
response = requests.head(fallback_url, timeout=2)
response.raise_for_status()
if response.status_code == 200:
return fallback_url
except requests.RequestException as e:
log_exception(e)
return None
return None
def fetch_and_encode_favicon(
headers: Dict[str, str], soup: BeautifulSoup, url: str
) -> Optional[Dict[str, str]]:
"""
Fetch favicon and encode it as base64.
Args:
favicon_url: URL to the favicon
headers: Request headers
Returns:
str: Base64 encoded favicon with data URI prefix or None
"""
try:
favicon_url = find_favicon_url(soup, url)
if favicon_url is None:
return {
"favicon_url": None,
"favicon_base64": f"data:image/svg+xml;base64,{DEFAULT_FAVICON}",
}
response = requests.get(favicon_url, headers=headers, timeout=2)
response.raise_for_status()
# Get content type
content_type = response.headers.get("content-type", "image/x-icon")
# Convert to base64
favicon_base64 = base64.b64encode(response.content).decode("utf-8")
# Return as data URI
return {
"favicon_url": favicon_url,
"favicon_base64": f"data:{content_type};base64,{favicon_base64}",
}
except Exception as e:
logger.warning(f"Failed to fetch favicon: {e}")
return {
"favicon_url": None,
"favicon_base64": f"data:image/svg+xml;base64,{DEFAULT_FAVICON}",
}

View File

@@ -2,7 +2,7 @@
import { FC } from "react";
import { observer } from "mobx-react";
import { Pencil, Trash2, Copy } from "lucide-react";
import { Pencil, Trash2, Copy, Link } from "lucide-react";
import { EIssueServiceType } from "@plane/constants";
import { useTranslation } from "@plane/i18n";
import { TIssueServiceType } from "@plane/types";
@@ -37,7 +37,9 @@ export const IssueLinkItem: FC<TIssueLinkItem> = observer((props) => {
const linkDetail = getLinkById(linkId);
if (!linkDetail) return <></>;
const Icon = getIconForLink(linkDetail.url);
// const Icon = getIconForLink(linkDetail.url);
const faviconUrl: string | undefined = linkDetail.metadata?.favicon;
const linkTitle: string | undefined = linkDetail.metadata?.title;
const toggleIssueLinkModal = (modalToggle: boolean) => {
toggleIssueLinkModalStore(modalToggle);
@@ -50,15 +52,21 @@ export const IssueLinkItem: FC<TIssueLinkItem> = observer((props) => {
className="group col-span-12 lg:col-span-6 xl:col-span-4 2xl:col-span-3 3xl:col-span-2 flex items-center justify-between gap-3 h-10 flex-shrink-0 px-3 bg-custom-background-90 hover:bg-custom-background-80 border-[0.5px] border-custom-border-200 rounded"
>
<div className="flex items-center gap-2.5 truncate flex-grow">
<Icon className="size-4 flex-shrink-0 stroke-2 text-custom-text-350 group-hover:text-custom-text-100" />
{faviconUrl ? (
<img src={faviconUrl} alt="favicon" className="size-4" />
) : (
<Link className="size-4 text-custom-text-350 group-hover:text-custom-text-100" />
)}
<Tooltip tooltipContent={linkDetail.url} isMobile={isMobile}>
<a
href={linkDetail.url}
target="_blank"
rel="noopener noreferrer"
className="truncate text-sm cursor-pointer flex-grow"
className="truncate text-sm cursor-pointer flex-grow flex items-center gap-3"
>
{linkDetail.title && linkDetail.title !== "" ? linkDetail.title : linkDetail.url}
{linkTitle && linkTitle !== "" && <span className="text-custom-text-400 text-xs">{linkTitle}</span>}
</a>
</Tooltip>
</div>