mirror of
https://github.com/agregarr/agregarr.git
synced 2026-04-29 22:49:27 -05:00
fix(imdb): add AWS WAF challenge solver, fixing all IMDb lists
adds headless chromium to solve challenge. recent changes to IMDb require this, all lists return zero items otherwise.
This commit is contained in:
+58
-45
@@ -6,8 +6,8 @@ ARG TARGETPLATFORM
|
||||
ENV TARGETPLATFORM=${TARGETPLATFORM:-linux/amd64}
|
||||
|
||||
RUN apk add --no-cache \
|
||||
python3 make g++ gcc libc6-compat bash \
|
||||
build-base cairo-dev pango-dev jpeg-dev giflib-dev pixman-dev
|
||||
python3 make g++ gcc libc6-compat bash \
|
||||
build-base cairo-dev pango-dev jpeg-dev giflib-dev pixman-dev
|
||||
|
||||
RUN yarn global add node-gyp
|
||||
|
||||
@@ -36,61 +36,74 @@ FROM node:20.19.5-alpine
|
||||
WORKDIR /app
|
||||
|
||||
RUN apk add --no-cache \
|
||||
tzdata tini fontconfig ttf-dejavu font-noto-emoji \
|
||||
cairo pango jpeg giflib pixman \
|
||||
ffmpeg python3 \
|
||||
&& mkdir -p /usr/share/fonts/truetype/poster-fonts && \
|
||||
cd /usr/share/fonts/truetype/poster-fonts && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/bebasneue/BebasNeue-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/anton/Anton-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/creepster/Creepster-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/bangers/Bangers-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/abrilfatface/AbrilFatface-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/lato/Lato-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/pacifico/Pacifico-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/greatvibes/GreatVibes-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/nosifer/Nosifer-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/bungee/Bungee-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/pressstart2p/PressStart2P-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/courierprime/CourierPrime-Regular.ttf && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/oswald/Oswald[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/fredoka/Fredoka[wdth,wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/playfairdisplay/PlayfairDisplay[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/montserrat/Montserrat[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/roboto/Roboto[wdth,wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/inter/Inter[opsz,wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/jetbrainsmono/JetBrainsMono[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/dancingscript/DancingScript[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/raleway/Raleway[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/orbitron/Orbitron[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/cinzel/Cinzel[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/cormorantgaramond/CormorantGaramond[wght].ttf" && \
|
||||
wget -q https://github.com/tonsky/FiraCode/releases/download/6.2/Fira_Code_v6.2.zip && \
|
||||
unzip -q Fira_Code_v6.2.zip && \
|
||||
mv ttf/FiraCode-Bold.ttf . && \
|
||||
rm -rf Fira_Code_v6.2.zip ttf/ woff/ woff2/ variable_ttf/ && \
|
||||
fc-cache -fv && \
|
||||
rm -rf /tmp/*
|
||||
tzdata tini fontconfig ttf-dejavu font-noto-emoji \
|
||||
cairo pango jpeg giflib pixman \
|
||||
ffmpeg python3 \
|
||||
&& mkdir -p /usr/share/fonts/truetype/poster-fonts && \
|
||||
cd /usr/share/fonts/truetype/poster-fonts && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/bebasneue/BebasNeue-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/anton/Anton-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/creepster/Creepster-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/bangers/Bangers-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/abrilfatface/AbrilFatface-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/lato/Lato-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/pacifico/Pacifico-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/greatvibes/GreatVibes-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/nosifer/Nosifer-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/bungee/Bungee-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/pressstart2p/PressStart2P-Regular.ttf && \
|
||||
wget -q https://raw.githubusercontent.com/google/fonts/main/ofl/courierprime/CourierPrime-Regular.ttf && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/oswald/Oswald[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/fredoka/Fredoka[wdth,wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/playfairdisplay/PlayfairDisplay[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/montserrat/Montserrat[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/roboto/Roboto[wdth,wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/inter/Inter[opsz,wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/jetbrainsmono/JetBrainsMono[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/dancingscript/DancingScript[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/raleway/Raleway[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/orbitron/Orbitron[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/cinzel/Cinzel[wght].ttf" && \
|
||||
wget -q "https://raw.githubusercontent.com/google/fonts/main/ofl/cormorantgaramond/CormorantGaramond[wght].ttf" && \
|
||||
wget -q https://github.com/tonsky/FiraCode/releases/download/6.2/Fira_Code_v6.2.zip && \
|
||||
unzip -q Fira_Code_v6.2.zip && \
|
||||
mv ttf/FiraCode-Bold.ttf . && \
|
||||
rm -rf Fira_Code_v6.2.zip ttf/ woff/ woff2/ variable_ttf/ && \
|
||||
fc-cache -fv && \
|
||||
rm -rf /tmp/*
|
||||
|
||||
# Configure fontconfig to scan custom fonts directory
|
||||
RUN mkdir -p /etc/fonts/conf.d && \
|
||||
echo '<?xml version="1.0"?>' > /etc/fonts/conf.d/99-agregarr-custom-fonts.conf && \
|
||||
echo '<!DOCTYPE fontconfig SYSTEM "fonts.dtd">' >> /etc/fonts/conf.d/99-agregarr-custom-fonts.conf && \
|
||||
echo '<fontconfig>' >> /etc/fonts/conf.d/99-agregarr-custom-fonts.conf && \
|
||||
echo ' <dir>/app/config/fonts</dir>' >> /etc/fonts/conf.d/99-agregarr-custom-fonts.conf && \
|
||||
echo '</fontconfig>' >> /etc/fonts/conf.d/99-agregarr-custom-fonts.conf
|
||||
echo '<?xml version="1.0"?>' > /etc/fonts/conf.d/99-agregarr-custom-fonts.conf && \
|
||||
echo '<!DOCTYPE fontconfig SYSTEM "fonts.dtd">' >> /etc/fonts/conf.d/99-agregarr-custom-fonts.conf && \
|
||||
echo '<fontconfig>' >> /etc/fonts/conf.d/99-agregarr-custom-fonts.conf && \
|
||||
echo ' <dir>/app/config/fonts</dir>' >> /etc/fonts/conf.d/99-agregarr-custom-fonts.conf && \
|
||||
echo '</fontconfig>' >> /etc/fonts/conf.d/99-agregarr-custom-fonts.conf
|
||||
|
||||
# Install Deno - yt-dlp requires a JS runtime as of 2025-11-12
|
||||
RUN echo "@edge https://dl-cdn.alpinelinux.org/alpine/edge/main" >> /etc/apk/repositories && \
|
||||
echo "@edge https://dl-cdn.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories && \
|
||||
apk add --no-cache deno@edge
|
||||
echo "@edge https://dl-cdn.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories && \
|
||||
apk add --no-cache deno@edge
|
||||
|
||||
# Install Chromium and dependencies for Playwright
|
||||
RUN apk add --no-cache \
|
||||
chromium \
|
||||
nss \
|
||||
freetype \
|
||||
harfbuzz \
|
||||
ca-certificates \
|
||||
ttf-freefont
|
||||
|
||||
# Install latest yt-dlp directly from GitHub releases (more up-to-date than apk package)
|
||||
RUN wget -q https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -O /usr/local/bin/yt-dlp && \
|
||||
chmod a+rx /usr/local/bin/yt-dlp
|
||||
chmod a+rx /usr/local/bin/yt-dlp
|
||||
|
||||
COPY --from=build_image /app ./
|
||||
|
||||
# Configure Playwright to use system Chromium
|
||||
ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
|
||||
ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
|
||||
|
||||
ENTRYPOINT [ "/sbin/tini", "--" ]
|
||||
CMD [ "yarn", "start" ]
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@
|
||||
"ace-builds": "1.15.2",
|
||||
"archiver": "^7.0.1",
|
||||
"axios": "1.3.4",
|
||||
"axios-cookiejar-support": "^6.0.5",
|
||||
"axios-rate-limit": "1.3.0",
|
||||
"bcrypt": "5.1.0",
|
||||
"bowser": "2.11.0",
|
||||
@@ -80,6 +81,7 @@
|
||||
"nodemailer": "6.9.1",
|
||||
"openpgp": "5.7.0",
|
||||
"play-dl": "^1.9.7",
|
||||
"playwright": "^1.57.0",
|
||||
"plex-api": "5.3.2",
|
||||
"pug": "3.0.2",
|
||||
"react": "18.2.0",
|
||||
@@ -105,6 +107,7 @@
|
||||
"sqlite3": "5.1.4",
|
||||
"swagger-ui-express": "4.6.2",
|
||||
"swr": "2.0.4",
|
||||
"tough-cookie": "^6.0.0",
|
||||
"typeorm": "0.3.12",
|
||||
"ua-parser-js": "1.0.35",
|
||||
"vm2": "^3.10.0",
|
||||
|
||||
@@ -16,7 +16,6 @@ import {
|
||||
createSyncError,
|
||||
getCollectionSyncCounter,
|
||||
getMediaTypeFromLibrary,
|
||||
handleRateLimit,
|
||||
incrementCollectionSyncCounter,
|
||||
parseConfigIdFromLabel,
|
||||
processMissingItemsWithMode,
|
||||
@@ -238,11 +237,6 @@ export class MultiSourceOrchestrator {
|
||||
const source = sourcesToFetch[i];
|
||||
|
||||
try {
|
||||
// Apply rate limiting between source fetches
|
||||
if (i > 0) {
|
||||
await handleRateLimit(1, 'Multi-Source');
|
||||
}
|
||||
|
||||
const { items, missingItems } = await this.fetchItemsFromSource(
|
||||
source,
|
||||
config,
|
||||
|
||||
@@ -17,6 +17,7 @@ import type {
|
||||
PlexCollection,
|
||||
} from '@server/lib/collections/core/types';
|
||||
import { CollectionSyncErrorType } from '@server/lib/collections/core/types';
|
||||
import { ImdbAxiosClient } from '@server/lib/collections/utils/ImdbAxiosClient';
|
||||
import { RandomListManager } from '@server/lib/collections/utils/RandomListManager';
|
||||
import type { CollectionConfig } from '@server/lib/settings';
|
||||
import logger from '@server/logger';
|
||||
@@ -66,7 +67,7 @@ export class ImdbCollectionSync extends BaseCollectionSync<'imdb'> {
|
||||
);
|
||||
}
|
||||
|
||||
const axios = (await import('axios')).default;
|
||||
const axios = await ImdbAxiosClient.getInstance();
|
||||
|
||||
logger.debug(
|
||||
`Fetching IMDb custom list with __NEXT_DATA__ pagination: ${config.imdbCustomListUrl}`,
|
||||
@@ -94,10 +95,6 @@ export class ImdbCollectionSync extends BaseCollectionSync<'imdb'> {
|
||||
});
|
||||
|
||||
const response = await axios.get(pageUrl, {
|
||||
headers: {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
},
|
||||
timeout: 15000,
|
||||
});
|
||||
|
||||
@@ -194,7 +191,7 @@ export class ImdbCollectionSync extends BaseCollectionSync<'imdb'> {
|
||||
});
|
||||
|
||||
// Fetch all pages using __NEXT_DATA__ pagination (same as custom lists)
|
||||
const axios = (await import('axios')).default;
|
||||
const axios = await ImdbAxiosClient.getInstance();
|
||||
|
||||
let currentPage = 1;
|
||||
const maxPages = 50; // Safety limit
|
||||
@@ -213,10 +210,6 @@ export class ImdbCollectionSync extends BaseCollectionSync<'imdb'> {
|
||||
);
|
||||
|
||||
const response = await axios.get(pageUrl, {
|
||||
headers: {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
},
|
||||
timeout: 15000,
|
||||
});
|
||||
|
||||
@@ -289,17 +282,13 @@ export class ImdbCollectionSync extends BaseCollectionSync<'imdb'> {
|
||||
config.subtype || '',
|
||||
mediaType
|
||||
);
|
||||
const axios = (await import('axios')).default;
|
||||
const axios = await ImdbAxiosClient.getInstance();
|
||||
|
||||
// Fetching predefined IMDb list
|
||||
|
||||
const response = await axios.get(
|
||||
`https://www.imdb.com${predefinedUrl}`,
|
||||
{
|
||||
headers: {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
},
|
||||
timeout: 10000,
|
||||
}
|
||||
);
|
||||
|
||||
@@ -0,0 +1,206 @@
|
||||
import logger from '@server/logger';
|
||||
import { chromium, type BrowserContext, type Cookie } from 'playwright';
|
||||
|
||||
/**
|
||||
* AWS WAF Token Solver
|
||||
*
|
||||
* Solves AWS WAF JavaScript challenges using Playwright headless browser
|
||||
* to obtain the aws-waf-token cookie needed for IMDb requests.
|
||||
*
|
||||
* The token is cached and reused until it expires (typically 5-10 minutes).
|
||||
*/
|
||||
export class AwsWafTokenSolver {
|
||||
private static tokenCache: Map<
|
||||
string,
|
||||
{ token: string; expiresAt: number; sessionCookies: Cookie[] }
|
||||
> = new Map();
|
||||
private static solvingInProgress: Map<string, Promise<Cookie[]>> = new Map();
|
||||
|
||||
/**
|
||||
* Get cookies for a domain, solving WAF challenge if needed
|
||||
*/
|
||||
static async getCookies(url: string): Promise<Cookie[]> {
|
||||
const domain = new URL(url).hostname;
|
||||
|
||||
// Check if we have a valid cached token
|
||||
const cached = this.tokenCache.get(domain);
|
||||
if (cached && cached.expiresAt > Date.now()) {
|
||||
logger.debug('Using cached AWS WAF token', {
|
||||
label: 'AWS WAF Solver',
|
||||
domain,
|
||||
expiresIn: Math.round((cached.expiresAt - Date.now()) / 1000) + 's',
|
||||
});
|
||||
return cached.sessionCookies;
|
||||
}
|
||||
|
||||
// Check if solving is already in progress for this domain
|
||||
const inProgress = this.solvingInProgress.get(domain);
|
||||
if (inProgress) {
|
||||
logger.debug('WAF challenge solve already in progress, waiting...', {
|
||||
label: 'AWS WAF Solver',
|
||||
domain,
|
||||
});
|
||||
return await inProgress;
|
||||
}
|
||||
|
||||
// Start solving
|
||||
const solvePromise = this.solveChallenge(url);
|
||||
this.solvingInProgress.set(domain, solvePromise);
|
||||
|
||||
try {
|
||||
const cookies = await solvePromise;
|
||||
return cookies;
|
||||
} finally {
|
||||
this.solvingInProgress.delete(domain);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Solve AWS WAF challenge using Playwright
|
||||
*/
|
||||
private static async solveChallenge(url: string): Promise<Cookie[]> {
|
||||
const domain = new URL(url).hostname;
|
||||
|
||||
logger.info('Solving AWS WAF challenge for domain', {
|
||||
label: 'AWS WAF Solver',
|
||||
domain,
|
||||
url,
|
||||
});
|
||||
|
||||
let context: BrowserContext | null = null;
|
||||
|
||||
try {
|
||||
// Use system Chromium if configured (Docker/Alpine), otherwise use Playwright's
|
||||
const executablePath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH;
|
||||
|
||||
const browser = await chromium.launch({
|
||||
headless: true,
|
||||
executablePath: executablePath || undefined,
|
||||
args: [
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--disable-dev-shm-usage',
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
],
|
||||
});
|
||||
|
||||
context = await browser.newContext({
|
||||
userAgent:
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
viewport: { width: 1920, height: 1080 },
|
||||
locale: 'en-US',
|
||||
});
|
||||
|
||||
const page = await context.newPage();
|
||||
|
||||
// Navigate to the URL
|
||||
logger.debug('Navigating to URL to trigger WAF challenge', {
|
||||
label: 'AWS WAF Solver',
|
||||
url,
|
||||
});
|
||||
|
||||
const response = await page.goto(url, {
|
||||
waitUntil: 'networkidle',
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
const status = response?.status();
|
||||
logger.debug('Initial response received', {
|
||||
label: 'AWS WAF Solver',
|
||||
status,
|
||||
});
|
||||
|
||||
// If we got a challenge (202), wait for it to complete
|
||||
if (status === 202) {
|
||||
logger.debug('AWS WAF challenge detected, waiting for completion...', {
|
||||
label: 'AWS WAF Solver',
|
||||
});
|
||||
|
||||
// Wait for the challenge to complete (page will reload with 200)
|
||||
await page.waitForLoadState('networkidle', { timeout: 30000 });
|
||||
|
||||
// Give it a moment to ensure cookies are set
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
logger.debug('WAF challenge appears to be completed', {
|
||||
label: 'AWS WAF Solver',
|
||||
});
|
||||
}
|
||||
|
||||
// Extract all cookies
|
||||
const cookies = await context.cookies();
|
||||
|
||||
logger.debug('Extracted cookies from browser', {
|
||||
label: 'AWS WAF Solver',
|
||||
cookieCount: cookies.length,
|
||||
cookieNames: cookies.map((c) => c.name).join(', '),
|
||||
});
|
||||
|
||||
// Find the aws-waf-token
|
||||
const wafToken = cookies.find((c) => c.name === 'aws-waf-token');
|
||||
|
||||
if (!wafToken) {
|
||||
logger.warn('No aws-waf-token found after challenge completion', {
|
||||
label: 'AWS WAF Solver',
|
||||
availableCookies: cookies.map((c) => c.name),
|
||||
});
|
||||
throw new Error('Failed to obtain aws-waf-token');
|
||||
}
|
||||
|
||||
logger.info('Successfully obtained AWS WAF token', {
|
||||
label: 'AWS WAF Solver',
|
||||
domain,
|
||||
tokenLength: wafToken.value.length,
|
||||
});
|
||||
|
||||
// Cache the token (expires in 5 minutes or based on cookie expiry)
|
||||
const expiresAt = wafToken.expires
|
||||
? wafToken.expires * 1000
|
||||
: Date.now() + 5 * 60 * 1000; // 5 minutes default
|
||||
|
||||
this.tokenCache.set(domain, {
|
||||
token: wafToken.value,
|
||||
expiresAt,
|
||||
sessionCookies: cookies,
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
|
||||
return cookies;
|
||||
} catch (error) {
|
||||
logger.error('Failed to solve AWS WAF challenge', {
|
||||
label: 'AWS WAF Solver',
|
||||
domain,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
|
||||
if (context) {
|
||||
await context.browser()?.close();
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Failed to solve AWS WAF challenge for ${domain}: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear cached token for a domain
|
||||
*/
|
||||
static clearCache(domain?: string): void {
|
||||
if (domain) {
|
||||
this.tokenCache.delete(domain);
|
||||
logger.debug('Cleared cached token for domain', {
|
||||
label: 'AWS WAF Solver',
|
||||
domain,
|
||||
});
|
||||
} else {
|
||||
this.tokenCache.clear();
|
||||
logger.debug('Cleared all cached tokens', {
|
||||
label: 'AWS WAF Solver',
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,189 @@
|
||||
import logger from '@server/logger';
|
||||
import axios, { type AxiosInstance } from 'axios';
|
||||
import { wrapper } from 'axios-cookiejar-support';
|
||||
import { CookieJar } from 'tough-cookie';
|
||||
import { AwsWafTokenSolver } from './AwsWafTokenSolver';
|
||||
|
||||
// Extend axios config to include our retry flag
|
||||
declare module 'axios' {
|
||||
export interface InternalAxiosRequestConfig {
|
||||
_wafRetry?: boolean;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* IMDb Axios Client with Cookie Persistence
|
||||
*
|
||||
* Provides a shared axios instance with cookie jar support for IMDb requests.
|
||||
* Automatically handles AWS WAF challenges by solving them when needed.
|
||||
*/
|
||||
export class ImdbAxiosClient {
|
||||
private static instance: AxiosInstance | null = null;
|
||||
private static cookieJar: CookieJar | null = null;
|
||||
private static isInitialized = false;
|
||||
|
||||
/**
|
||||
* Get the shared axios instance with cookie jar
|
||||
*/
|
||||
static async getInstance(): Promise<AxiosInstance> {
|
||||
if (!this.isInitialized) {
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
if (!this.instance) {
|
||||
throw new Error('Failed to initialize IMDb axios client');
|
||||
}
|
||||
|
||||
return this.instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the axios instance with cookie jar and WAF token
|
||||
*/
|
||||
private static async initialize(): Promise<void> {
|
||||
if (this.isInitialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.debug('Initializing IMDb axios client with cookie jar', {
|
||||
label: 'IMDb Axios Client',
|
||||
});
|
||||
|
||||
// Create cookie jar
|
||||
this.cookieJar = new CookieJar();
|
||||
|
||||
// Create axios instance with cookie jar support
|
||||
const client = wrapper(
|
||||
axios.create({
|
||||
jar: this.cookieJar,
|
||||
withCredentials: true,
|
||||
headers: {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
Accept:
|
||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Cache-Control': 'max-age=0',
|
||||
Connection: 'keep-alive',
|
||||
},
|
||||
})
|
||||
);
|
||||
|
||||
// Add response interceptor to handle WAF challenges
|
||||
// CRITICAL: 202 is a SUCCESS status, so we check in the success handler!
|
||||
client.interceptors.response.use(
|
||||
async (response) => {
|
||||
// Check if this is a WAF challenge (HTTP 202)
|
||||
if (
|
||||
response.status === 202 &&
|
||||
!response.config._wafRetry &&
|
||||
response.config.url?.includes('imdb.com')
|
||||
) {
|
||||
logger.warn('AWS WAF challenge detected (HTTP 202), solving...', {
|
||||
label: 'IMDb Axios Client',
|
||||
url: response.config.url,
|
||||
});
|
||||
|
||||
try {
|
||||
// Solve the challenge
|
||||
const cookies = await AwsWafTokenSolver.getCookies(
|
||||
response.config.url
|
||||
);
|
||||
|
||||
// Set cookies in the jar
|
||||
if (this.cookieJar) {
|
||||
for (const cookie of cookies) {
|
||||
await this.cookieJar.setCookie(
|
||||
`${cookie.name}=${cookie.value}`,
|
||||
response.config.url
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Mark this request as retried to avoid infinite loops
|
||||
response.config._wafRetry = true;
|
||||
|
||||
// Retry the request with cookies
|
||||
logger.debug('Retrying request with WAF token', {
|
||||
label: 'IMDb Axios Client',
|
||||
url: response.config.url,
|
||||
});
|
||||
|
||||
return client.request(response.config);
|
||||
} catch (wafError) {
|
||||
logger.error('Failed to solve WAF challenge', {
|
||||
label: 'IMDb Axios Client',
|
||||
error:
|
||||
wafError instanceof Error ? wafError.message : String(wafError),
|
||||
});
|
||||
throw wafError;
|
||||
}
|
||||
}
|
||||
|
||||
return response;
|
||||
},
|
||||
async (error) => {
|
||||
// Still handle actual errors (4xx, 5xx)
|
||||
return Promise.reject(error);
|
||||
}
|
||||
);
|
||||
|
||||
this.instance = client;
|
||||
this.isInitialized = true;
|
||||
|
||||
logger.info('IMDb axios client initialized successfully', {
|
||||
label: 'IMDb Axios Client',
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-fetch WAF token (optional - useful for warming up)
|
||||
*/
|
||||
static async warmup(): Promise<void> {
|
||||
logger.debug('Warming up IMDb client with WAF token', {
|
||||
label: 'IMDb Axios Client',
|
||||
});
|
||||
|
||||
try {
|
||||
const cookies = await AwsWafTokenSolver.getCookies(
|
||||
'https://www.imdb.com/chart/top/'
|
||||
);
|
||||
|
||||
if (!this.cookieJar) {
|
||||
this.cookieJar = new CookieJar();
|
||||
}
|
||||
|
||||
for (const cookie of cookies) {
|
||||
await this.cookieJar.setCookie(
|
||||
`${cookie.name}=${cookie.value}`,
|
||||
'https://www.imdb.com'
|
||||
);
|
||||
}
|
||||
|
||||
logger.info('IMDb client warmed up successfully', {
|
||||
label: 'IMDb Axios Client',
|
||||
cookieCount: cookies.length,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to warm up IMDb client', {
|
||||
label: 'IMDb Axios Client',
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the client (clears cookies and reinitializes)
|
||||
*/
|
||||
static reset(): void {
|
||||
this.isInitialized = false;
|
||||
this.instance = null;
|
||||
this.cookieJar = null;
|
||||
AwsWafTokenSolver.clearCache('www.imdb.com');
|
||||
|
||||
logger.debug('IMDb axios client reset', {
|
||||
label: 'IMDb Axios Client',
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -4615,7 +4615,7 @@ agent-base@6, agent-base@^6.0.2:
|
||||
dependencies:
|
||||
debug "4"
|
||||
|
||||
agent-base@^7.1.0, agent-base@^7.1.2:
|
||||
agent-base@^7.1.0, agent-base@^7.1.2, agent-base@^7.1.4:
|
||||
version "7.1.4"
|
||||
resolved "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz"
|
||||
integrity sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==
|
||||
@@ -5027,6 +5027,13 @@ axe-core@^4.6.2:
|
||||
resolved "https://registry.npmjs.org/axe-core/-/axe-core-4.6.3.tgz"
|
||||
integrity sha512-/BQzOX780JhsxDnPpH4ZiyrJAzcd8AfzFPkv+89veFSr1rcMjuq2JDCwypKaPeB6ljHp9KjXhPpjgCvQlWYuqg==
|
||||
|
||||
axios-cookiejar-support@^6.0.5:
|
||||
version "6.0.5"
|
||||
resolved "https://registry.yarnpkg.com/axios-cookiejar-support/-/axios-cookiejar-support-6.0.5.tgz#c8873c54f17e7501fb7e5fcc6edd9e5f555bbc06"
|
||||
integrity sha512-ldPOQCJWB0ipugkTNVB8QRl/5L2UgfmVNVQtS9en1JQJ1wW588PqAmymnwmmgc12HLDzDtsJ28xE2ppj4rD4ng==
|
||||
dependencies:
|
||||
http-cookie-agent "^7.0.3"
|
||||
|
||||
axios-rate-limit@1.3.0:
|
||||
version "1.3.0"
|
||||
resolved "https://registry.npmjs.org/axios-rate-limit/-/axios-rate-limit-1.3.0.tgz"
|
||||
@@ -7971,6 +7978,11 @@ fs.realpath@^1.0.0:
|
||||
resolved "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz"
|
||||
integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==
|
||||
|
||||
fsevents@2.3.2:
|
||||
version "2.3.2"
|
||||
resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a"
|
||||
integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==
|
||||
|
||||
fsevents@~2.3.2:
|
||||
version "2.3.3"
|
||||
resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6"
|
||||
@@ -8522,6 +8534,13 @@ http-cache-semantics@^4.2.0:
|
||||
resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-4.2.0.tgz#205f4db64f8562b76a4ff9235aa5279839a09dd5"
|
||||
integrity sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==
|
||||
|
||||
http-cookie-agent@^7.0.3:
|
||||
version "7.0.3"
|
||||
resolved "https://registry.yarnpkg.com/http-cookie-agent/-/http-cookie-agent-7.0.3.tgz#02d342cdfb89d61369f61595b5b5124b4315a66f"
|
||||
integrity sha512-EeZo7CGhfqPW6R006rJa4QtZZUpBygDa2HZH3DJqsTzTjyRE6foDBVQIv/pjVsxHC8z2GIdbB1Hvn9SRorP3WQ==
|
||||
dependencies:
|
||||
agent-base "^7.1.4"
|
||||
|
||||
http-errors@2.0.0:
|
||||
version "2.0.0"
|
||||
resolved "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz"
|
||||
@@ -11981,6 +12000,20 @@ play-dl@^1.9.7:
|
||||
dependencies:
|
||||
play-audio "^0.5.2"
|
||||
|
||||
playwright-core@1.57.0:
|
||||
version "1.57.0"
|
||||
resolved "https://registry.yarnpkg.com/playwright-core/-/playwright-core-1.57.0.tgz#3dcc9a865af256fa9f0af0d67fc8dd54eecaebf5"
|
||||
integrity sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==
|
||||
|
||||
playwright@^1.57.0:
|
||||
version "1.57.0"
|
||||
resolved "https://registry.yarnpkg.com/playwright/-/playwright-1.57.0.tgz#74d1dacff5048dc40bf4676940b1901e18ad0f46"
|
||||
integrity sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw==
|
||||
dependencies:
|
||||
playwright-core "1.57.0"
|
||||
optionalDependencies:
|
||||
fsevents "2.3.2"
|
||||
|
||||
plex-api-credentials@3.0.1:
|
||||
version "3.0.1"
|
||||
resolved "https://registry.npmjs.org/plex-api-credentials/-/plex-api-credentials-3.0.1.tgz"
|
||||
@@ -14220,6 +14253,11 @@ tldts-core@^6.1.86:
|
||||
resolved "https://registry.npmjs.org/tldts-core/-/tldts-core-6.1.86.tgz"
|
||||
integrity sha512-Je6p7pkk+KMzMv2XXKmAE3McmolOQFdxkKw0R8EYNr7sELW46JqnNeTX8ybPiQgvg1ymCoF8LXs5fzFaZvJPTA==
|
||||
|
||||
tldts-core@^7.0.19:
|
||||
version "7.0.19"
|
||||
resolved "https://registry.yarnpkg.com/tldts-core/-/tldts-core-7.0.19.tgz#9dd8a457a09b4e65c8266c029f1847fa78dead20"
|
||||
integrity sha512-lJX2dEWx0SGH4O6p+7FPwYmJ/bu1JbcGJ8RLaG9b7liIgZ85itUVEPbMtWRVrde/0fnDPEPHW10ZsKW3kVsE9A==
|
||||
|
||||
tldts@^6.1.32:
|
||||
version "6.1.86"
|
||||
resolved "https://registry.npmjs.org/tldts/-/tldts-6.1.86.tgz"
|
||||
@@ -14227,6 +14265,13 @@ tldts@^6.1.32:
|
||||
dependencies:
|
||||
tldts-core "^6.1.86"
|
||||
|
||||
tldts@^7.0.5:
|
||||
version "7.0.19"
|
||||
resolved "https://registry.yarnpkg.com/tldts/-/tldts-7.0.19.tgz#84cd7a7f04e68ec93b93b106fac038c527b99368"
|
||||
integrity sha512-8PWx8tvC4jDB39BQw1m4x8y5MH1BcQ5xHeL2n7UVFulMPH/3Q0uiamahFJ3lXA0zO2SUyRXuVVbWSDmstlt9YA==
|
||||
dependencies:
|
||||
tldts-core "^7.0.19"
|
||||
|
||||
tmp@^0.0.33:
|
||||
version "0.0.33"
|
||||
resolved "https://registry.npmjs.org/tmp/-/tmp-0.0.33.tgz"
|
||||
@@ -14300,6 +14345,13 @@ tough-cookie@^5.1.1:
|
||||
dependencies:
|
||||
tldts "^6.1.32"
|
||||
|
||||
tough-cookie@^6.0.0:
|
||||
version "6.0.0"
|
||||
resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-6.0.0.tgz#11e418b7864a2c0d874702bc8ce0f011261940e5"
|
||||
integrity sha512-kXuRi1mtaKMrsLUxz3sQYvVl37B0Ns6MzfrtV5DvJceE9bPyspOqk9xxv7XbZWcfLWbFmm997vl83qUWVJA64w==
|
||||
dependencies:
|
||||
tldts "^7.0.5"
|
||||
|
||||
tr46@^5.1.0:
|
||||
version "5.1.1"
|
||||
resolved "https://registry.npmjs.org/tr46/-/tr46-5.1.1.tgz"
|
||||
|
||||
Reference in New Issue
Block a user