mirror of
https://github.com/agregarr/agregarr.git
synced 2026-05-08 11:50:17 -05:00
fix(letterboxd): extract title from data-item-name instead of img alt (#405)
The previous regex extracted from `<img alt="...">` which contains "Poster for [Title]" instead of the clean title. - Use `data-item-name` attribute which has the actual title - Strip year suffix since year is extracted separately from `data-item-full-display-name` Co-authored-by: bitr8 <bitr8@users.noreply.github.com>
This commit is contained in:
@@ -723,7 +723,7 @@ export class LetterboxdCollectionSync extends BaseCollectionSync<'letterboxd'> {
|
||||
const filmIdRegex = /data-film-id="([^"]+)"/;
|
||||
const targetLinkRegex = /data-target-link="([^"]+)"/;
|
||||
const fullDisplayNameRegex = /data-item-full-display-name="([^"]+)"/;
|
||||
const titleRegex = /<img[^>]*alt="([^"]+)"/;
|
||||
const titleRegex = /data-item-name="([^"]+)"/;
|
||||
|
||||
let matches: RegExpMatchArray[] = [];
|
||||
let patternUsed = 0;
|
||||
@@ -776,6 +776,8 @@ export class LetterboxdCollectionSync extends BaseCollectionSync<'letterboxd'> {
|
||||
|
||||
// Decode HTML entities in the title
|
||||
let title = titleMatch[1];
|
||||
// Strip year suffix like "(2004)" since year is extracted separately
|
||||
title = title.replace(/\s*\(\d{4}\)$/, '');
|
||||
title = title
|
||||
.replace(/‎/g, '') // Remove left-to-right mark
|
||||
.replace(/‏/g, '') // Remove right-to-left mark
|
||||
|
||||
Reference in New Issue
Block a user