mirror of
https://github.com/agregarr/agregarr.git
synced 2026-01-25 20:08:48 -06:00
350 lines
9.0 KiB
TypeScript
350 lines
9.0 KiB
TypeScript
import ExternalAPI from '@server/api/externalapi';
|
|
import cacheManager from '@server/lib/cache';
|
|
import logger from '@server/logger';
|
|
import { JSDOM } from 'jsdom';
|
|
|
|
/**
|
|
* IMDb List Item interface
|
|
*/
|
|
export interface ImdbListItem {
|
|
imdbId: string;
|
|
title: string;
|
|
year?: number;
|
|
type: 'movie' | 'tv';
|
|
tmdbId?: number; // Will be resolved separately
|
|
}
|
|
|
|
/**
|
|
* IMDb List interface
|
|
*/
|
|
export interface ImdbList {
|
|
id: string;
|
|
title: string;
|
|
description?: string;
|
|
items: ImdbListItem[];
|
|
totalItems: number;
|
|
}
|
|
|
|
/**
|
|
* IMDb Top Lists enum for predefined lists
|
|
*/
|
|
export enum ImdbTopList {
|
|
TOP_250_MOVIES = 'top250movies',
|
|
TOP_250_TV = 'top250tv',
|
|
POPULAR_MOVIES = 'popularmovies',
|
|
POPULAR_TV = 'populartv',
|
|
MOST_POPULAR_MOVIES = 'mostpopularmovies',
|
|
MOST_POPULAR_TV = 'mostpopulartv',
|
|
}
|
|
|
|
/**
|
|
* IMDb API client for fetching lists and popular content
|
|
*
|
|
* Note: IMDb doesn't have a public API for lists, so this uses web scraping
|
|
* for public IMDb lists. This is a best-effort implementation.
|
|
*/
|
|
class ImdbAPI extends ExternalAPI {
|
|
constructor() {
|
|
super('https://www.imdb.com', {
|
|
headers: {
|
|
'User-Agent':
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
|
Accept:
|
|
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Language': 'en-US,en;q=0.5',
|
|
'Accept-Encoding': 'gzip, deflate',
|
|
Connection: 'keep-alive',
|
|
},
|
|
nodeCache: cacheManager.getCache('imdb').data,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get a predefined IMDb top list
|
|
*/
|
|
public async getTopList(
|
|
listType: ImdbTopList,
|
|
limit = 50
|
|
): Promise<ImdbListItem[]> {
|
|
try {
|
|
let url: string;
|
|
let expectedType: 'movie' | 'tv';
|
|
|
|
switch (listType) {
|
|
case ImdbTopList.TOP_250_MOVIES:
|
|
url = '/chart/top/';
|
|
expectedType = 'movie';
|
|
break;
|
|
case ImdbTopList.TOP_250_TV:
|
|
url = '/chart/toptv/';
|
|
expectedType = 'tv';
|
|
break;
|
|
case ImdbTopList.POPULAR_MOVIES:
|
|
url = '/chart/moviemeter/';
|
|
expectedType = 'movie';
|
|
break;
|
|
case ImdbTopList.POPULAR_TV:
|
|
url = '/chart/tvmeter/';
|
|
expectedType = 'tv';
|
|
break;
|
|
case ImdbTopList.MOST_POPULAR_MOVIES:
|
|
url = '/chart/boxoffice/';
|
|
expectedType = 'movie';
|
|
break;
|
|
case ImdbTopList.MOST_POPULAR_TV:
|
|
url = '/chart/tvpopular/';
|
|
expectedType = 'tv';
|
|
break;
|
|
default:
|
|
throw new Error(`Unknown IMDb top list type: ${listType}`);
|
|
}
|
|
|
|
const html = await this.get<string>(url, undefined, 30000);
|
|
return this.parseTopListHtml(html, expectedType, limit);
|
|
} catch (error) {
|
|
logger.error(`Failed to fetch IMDb top list ${listType}:`, {
|
|
error: error instanceof Error ? error.message : 'Unknown error',
|
|
listType,
|
|
stack: error instanceof Error ? error.stack : undefined,
|
|
});
|
|
throw new Error(
|
|
`Failed to fetch IMDb top list: ${
|
|
error instanceof Error ? error.message : 'Unknown error'
|
|
}`
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get a custom IMDb list by URL
|
|
*/
|
|
public async getCustomList(
|
|
listUrl: string,
|
|
limit = 1000
|
|
): Promise<ImdbListItem[]> {
|
|
try {
|
|
// Extract list ID from URL
|
|
const listMatch = listUrl.match(/\/list\/(ls\d+)/);
|
|
if (!listMatch) {
|
|
throw new Error('Invalid IMDb list URL format');
|
|
}
|
|
|
|
const listId = listMatch[1];
|
|
const url = `/list/${listId}/`;
|
|
|
|
const html = await this.get<string>(url, undefined, 30000);
|
|
return this.parseCustomListHtml(html, limit);
|
|
} catch (error) {
|
|
logger.error(`Failed to fetch IMDb custom list ${listUrl}:`, {
|
|
error: error instanceof Error ? error.message : 'Unknown error',
|
|
listUrl,
|
|
stack: error instanceof Error ? error.stack : undefined,
|
|
});
|
|
throw new Error(
|
|
`Failed to fetch IMDb custom list: ${
|
|
error instanceof Error ? error.message : 'Unknown error'
|
|
}`
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse HTML for top lists (Top 250, Popular, etc.)
|
|
*/
|
|
private parseTopListHtml(
|
|
html: string,
|
|
expectedType: 'movie' | 'tv',
|
|
limit: number
|
|
): ImdbListItem[] {
|
|
const dom = new JSDOM(html);
|
|
const document = dom.window.document;
|
|
const items: ImdbListItem[] = [];
|
|
|
|
// Different selectors for different chart types
|
|
const itemSelectors = [
|
|
'.cli-item', // Top 250 movies/TV
|
|
'.titleColumn', // Some chart pages
|
|
'.ipc-title-link-wrapper', // Newer layout
|
|
'.titleListItem', // Fallback
|
|
];
|
|
|
|
let itemElements: NodeListOf<Element> | null = null;
|
|
|
|
for (const selector of itemSelectors) {
|
|
itemElements = document.querySelectorAll(selector);
|
|
if (itemElements.length > 0) break;
|
|
}
|
|
|
|
if (!itemElements || itemElements.length === 0) {
|
|
logger.warn('No items found in IMDb top list HTML');
|
|
return [];
|
|
}
|
|
|
|
for (let i = 0; i < Math.min(itemElements.length, limit); i++) {
|
|
const element = itemElements[i];
|
|
const imdbId = this.extractImdbId(element);
|
|
const title = this.extractTitle(element);
|
|
const year = this.extractYear(element);
|
|
|
|
if (imdbId && title) {
|
|
items.push({
|
|
imdbId,
|
|
title,
|
|
year,
|
|
type: expectedType,
|
|
});
|
|
}
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
/**
|
|
* Parse HTML for custom user lists
|
|
*/
|
|
private parseCustomListHtml(html: string, limit: number): ImdbListItem[] {
|
|
const dom = new JSDOM(html);
|
|
const document = dom.window.document;
|
|
const items: ImdbListItem[] = [];
|
|
|
|
const itemElements = document.querySelectorAll('.lister-item, .ipc-title');
|
|
|
|
for (let i = 0; i < Math.min(itemElements.length, limit); i++) {
|
|
const element = itemElements[i];
|
|
const imdbId = this.extractImdbId(element);
|
|
const title = this.extractTitle(element);
|
|
const year = this.extractYear(element);
|
|
const type = this.inferType(element);
|
|
|
|
if (imdbId && title) {
|
|
items.push({
|
|
imdbId,
|
|
title,
|
|
year,
|
|
type,
|
|
});
|
|
}
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
/**
|
|
* Extract IMDb ID from an element
|
|
*/
|
|
private extractImdbId(element: Element): string | null {
|
|
// Look for links with IMDb title patterns
|
|
const linkSelectors = ['a[href*="/title/"]', '[href*="/title/"]'];
|
|
|
|
for (const selector of linkSelectors) {
|
|
const link = element.querySelector(selector) || element.closest(selector);
|
|
if (link) {
|
|
const href = link.getAttribute('href');
|
|
const match = href?.match(/\/title\/(tt\d+)/);
|
|
if (match) return match[1];
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Extract title from an element
|
|
*/
|
|
private extractTitle(element: Element): string | null {
|
|
const titleSelectors = [
|
|
'.cli-title a',
|
|
'.titleColumn a',
|
|
'.ipc-title__text',
|
|
'.titleListItem .title a',
|
|
'h3 a',
|
|
'.title a',
|
|
'a',
|
|
];
|
|
|
|
for (const selector of titleSelectors) {
|
|
const titleElement = element.querySelector(selector);
|
|
if (titleElement?.textContent?.trim()) {
|
|
return titleElement.textContent.trim();
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Extract year from an element
|
|
*/
|
|
private extractYear(element: Element): number | undefined {
|
|
const yearSelectors = [
|
|
'.cli-title-metadata .cli-title-metadata-item:first-child',
|
|
'.secondaryInfo',
|
|
'.lister-item-year',
|
|
'.year',
|
|
];
|
|
|
|
for (const selector of yearSelectors) {
|
|
const yearElement = element.querySelector(selector);
|
|
if (yearElement?.textContent) {
|
|
const yearMatch = yearElement.textContent.match(/(\d{4})/);
|
|
if (yearMatch) {
|
|
return parseInt(yearMatch[1], 10);
|
|
}
|
|
}
|
|
}
|
|
|
|
return undefined;
|
|
}
|
|
|
|
/**
|
|
* Infer media type from element content
|
|
*/
|
|
private inferType(element: Element): 'movie' | 'tv' {
|
|
const text = element.textContent?.toLowerCase() || '';
|
|
|
|
// Look for TV indicators
|
|
if (
|
|
text.includes('tv series') ||
|
|
text.includes('tv mini series') ||
|
|
text.includes('episode') ||
|
|
text.includes('season')
|
|
) {
|
|
return 'tv';
|
|
}
|
|
|
|
// Default to movie
|
|
return 'movie';
|
|
}
|
|
|
|
/**
|
|
* Validate if a URL is a valid IMDb list URL
|
|
*/
|
|
public static isValidListUrl(url: string): boolean {
|
|
return /imdb\.com\/list\/ls\d+/.test(url);
|
|
}
|
|
|
|
/**
|
|
* Get the predefined list label for display
|
|
*/
|
|
public static getTopListLabel(listType: ImdbTopList): string {
|
|
switch (listType) {
|
|
case ImdbTopList.TOP_250_MOVIES:
|
|
return 'Top 250 Movies';
|
|
case ImdbTopList.TOP_250_TV:
|
|
return 'Top 250 TV Shows';
|
|
case ImdbTopList.POPULAR_MOVIES:
|
|
return 'Popular Movies';
|
|
case ImdbTopList.POPULAR_TV:
|
|
return 'Popular TV Shows';
|
|
case ImdbTopList.MOST_POPULAR_MOVIES:
|
|
return 'Most Popular Movies';
|
|
case ImdbTopList.MOST_POPULAR_TV:
|
|
return 'Most Popular TV Shows';
|
|
default:
|
|
return 'IMDb List';
|
|
}
|
|
}
|
|
}
|
|
|
|
export default ImdbAPI;
|