update: implement proper tmdb rate limiting

Allows the fetch meta command to work without hitting rate limits on tmdb. Tmdb's rate limit is ~40 per second, but each movie/tv has to send additional http request for each collection, network, company and person, so I figured 2 per second was safe.
This commit is contained in:
Roardom
2025-05-08 11:54:03 +00:00
parent e1fc97ecfe
commit 1962ef2c04
4 changed files with 78 additions and 16 deletions
+13 -10
View File
@@ -17,6 +17,9 @@ declare(strict_types=1);
namespace App\Console\Commands;
use App\Models\Torrent;
use App\Jobs\ProcessIgdbGameJob;
use App\Jobs\ProcessMovieJob;
use App\Jobs\ProcessTvJob;
use App\Services\Igdb\IgdbScraper;
use App\Services\Tmdb\TMDBScraper;
use Exception;
@@ -47,7 +50,7 @@ class FetchMeta extends Command
final public function handle(): void
{
$start = now();
$this->alert('Meta fetch queueing started. Fetching is done asynchronously in a separate job queue.');
$this->alert('Meta fetch queueing started. Fetching is done synchronously within this command. This can take awhile (~1 work per second).');
$tmdbScraper = new TMDBScraper();
$igdbScraper = new IgdbScraper();
@@ -61,11 +64,11 @@ class FetchMeta extends Command
->whereNotNull('tmdb_movie_id')
->pluck('tmdb_movie_id');
$this->info('Queueing all tmdb movie metadata fetching');
$this->info('Fetching '.$tmdbMovieIds->count().' movies');
foreach ($tmdbMovieIds as $id) {
sleep(3);
$tmdbScraper->movie($id);
usleep(250_000);
ProcessMovieJob::dispatchSync($id);
$this->info("Movie metadata fetched for tmdb {$id}");
}
@@ -78,12 +81,12 @@ class FetchMeta extends Command
->whereNotNull('tmdb_tv_id')
->pluck('tmdb_tv_id');
$this->info('Queueing all tmdb tv metadata fetching');
$this->info('Fetching '.$tmdbTvIds->count().' tv series');
foreach ($tmdbTvIds as $id) {
sleep(3);
$tmdbScraper->tv($id);
$this->info("Movie metadata fetched for tmdb {$id}");
usleep(250_000);
ProcessTvJob::dispatchSync($id);
$this->info("TV metadata fetched for tmdb {$id}");
}
$this->info('Querying all igdb game ids');
@@ -95,11 +98,11 @@ class FetchMeta extends Command
->whereNotNull('igdb')
->pluck('igdb');
$this->info('Queueing all igdb game metadata fetching');
$this->info('Fetching '.$igdbGameIds->count().' games');
foreach ($igdbGameIds as $id) {
usleep(250_000);
$igdbScraper->game($id);
ProcessIgdbGameJob::dispatchSync($id);
$this->info("Game metadata fetched for igdb {$id}");
}
+32 -3
View File
@@ -24,10 +24,13 @@ use App\Models\TmdbMovie;
use App\Models\TmdbPerson;
use App\Models\Torrent;
use App\Services\Tmdb\Client;
use DateTime;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\Middleware\RateLimited;
use Illuminate\Queue\Middleware\Skip;
use Illuminate\Queue\Middleware\WithoutOverlapping;
use Illuminate\Queue\SerializesModels;
@@ -52,11 +55,27 @@ class ProcessMovieJob implements ShouldQueue
*/
public function middleware(): array
{
return [new WithoutOverlapping((string) $this->id)->dontRelease()->expireAfter(30)];
return [
Skip::when(cache()->has("tmdb-movie-scraper:{$this->id}")),
new WithoutOverlapping((string) $this->id)->dontRelease()->expireAfter(30),
new RateLimited('tmdb'),
];
}
/**
* Determine the time at which the job should timeout.
*/
public function retryUntil(): DateTime
{
return now()->addDay();
}
public function handle(): void
{
// TMDB caches their api responses for 8 hours, so don't abuse them
cache()->put("tmdb-movie-scraper:{$this->id}", now(), 8 * 3600);
// Movie
$movieScraper = new Client\Movie($this->id);
@@ -97,8 +116,18 @@ class ProcessMovieJob implements ShouldQueue
$credits = $movieScraper->getCredits();
$people = [];
foreach (array_unique(array_column($credits, 'tmdb_person_id')) as $person_id) {
$people[] = (new Client\Person($person_id))->getPerson();
foreach (array_unique(array_column($credits, 'tmdb_person_id')) as $personId) {
// TMDB caches their api responses for 8 hours, so don't abuse them
$cacheKey = "tmdb-person-scraper:{$personId}";
if (cache()->has($cacheKey)) {
continue;
}
cache()->put($cacheKey, now(), 8 * 3600);
$people[] = (new Client\Person($personId))->getPerson();
}
TmdbPerson::upsert($people, 'id');
+32 -3
View File
@@ -24,10 +24,13 @@ use App\Models\TmdbPerson;
use App\Models\Torrent;
use App\Models\TmdbTv;
use App\Services\Tmdb\Client;
use DateTime;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\Middleware\RateLimited;
use Illuminate\Queue\Middleware\Skip;
use Illuminate\Queue\Middleware\WithoutOverlapping;
use Illuminate\Queue\SerializesModels;
@@ -52,11 +55,27 @@ class ProcessTvJob implements ShouldQueue
*/
public function middleware(): array
{
return [new WithoutOverlapping((string) $this->id)->dontRelease()->expireAfter(30)];
return [
Skip::when(cache()->has("tmdb-tv-scraper:{$this->id}")),
new WithoutOverlapping((string) $this->id)->dontRelease()->expireAfter(30),
new RateLimited('tmdb'),
];
}
/**
* Determine the time at which the job should timeout.
*/
public function retryUntil(): DateTime
{
return now()->addDay();
}
public function handle(): void
{
// TMDB caches their api responses for 8 hours, so don't abuse them
cache()->put("tmdb-tv-scraper:{$this->id}", now(), 8 * 3600);
// Tv
$tvScraper = new Client\TV($this->id);
@@ -99,8 +118,18 @@ class ProcessTvJob implements ShouldQueue
$credits = $tvScraper->getCredits();
$people = [];
foreach (array_unique(array_column($credits, 'tmdb_person_id')) as $person_id) {
$people[] = (new Client\Person($person_id))->getPerson();
foreach (array_unique(array_column($credits, 'tmdb_person_id')) as $personId) {
// TMDB caches their api responses for 8 hours, so don't abuse them
$cacheKey = "tmdb-person-scraper:{$personId}";
if (cache()->has($cacheKey)) {
continue;
}
cache()->put($cacheKey, now(), 8 * 3600);
$people[] = (new Client\Person($personId))->getPerson();
}
TmdbPerson::upsert($people, 'id');
+1
View File
@@ -86,6 +86,7 @@ class RouteServiceProvider extends ServiceProvider
RateLimiter::for('rss', fn (Request $request) => Limit::perMinute(30)->by('rss'.$request->ip()));
RateLimiter::for('authenticated-images', fn (Request $request): Limit => Limit::perMinute(200)->by('authenticated-images:'.$request->user()->id));
RateLimiter::for('search', fn (Request $request): Limit => Limit::perMinute(100)->by('search:'.$request->user()->id));
RateLimiter::for('tmdb', fn (): Limit => Limit::perSecond(2));
}
protected function removeIndexPhpFromUrl(): void