diff --git a/src/hooks/useProviderScrape.tsx b/src/hooks/useProviderScrape.tsx index 8eeceff6..d1120996 100644 --- a/src/hooks/useProviderScrape.tsx +++ b/src/hooks/useProviderScrape.tsx @@ -1,7 +1,11 @@ -import { ScrapeMedia } from "@movie-web/providers"; +import { + FullScraperEvents, + RunOutput, + ScrapeMedia, +} from "@movie-web/providers"; import { RefObject, useCallback, useEffect, useRef, useState } from "react"; -import { providers } from "@/utils/providers"; +import { getLoadbalancedProviderApiUrl, providers } from "@/utils/providers"; export interface ScrapingItems { id: string; @@ -18,96 +22,169 @@ export interface ScrapingSegment { percentage: number; } -export function useScrape() { +type ScraperEvent = Parameters< + NonNullable +>[0]; + +function useBaseScrape() { const [sources, setSources] = useState>({}); const [sourceOrder, setSourceOrder] = useState([]); const [currentSource, setCurrentSource] = useState(); + const lastId = useRef(null); + + const initEvent = useCallback((evt: ScraperEvent<"init">) => { + setSources( + evt.sourceIds + .map((v) => { + const source = providers.getMetadata(v); + if (!source) throw new Error("invalid source id"); + const out: ScrapingSegment = { + name: source.name, + id: source.id, + status: "waiting", + percentage: 0, + }; + return out; + }) + .reduce>((a, v) => { + a[v.id] = v; + return a; + }, {}) + ); + setSourceOrder(evt.sourceIds.map((v) => ({ id: v, children: [] }))); + }, []); + + const startEvent = useCallback((id: ScraperEvent<"start">) => { + setSources((s) => { + if (s[id]) s[id].status = "pending"; + return { ...s }; + }); + setCurrentSource(id); + lastId.current = id; + }, []); + + const updateEvent = useCallback((evt: ScraperEvent<"update">) => { + setSources((s) => { + if (s[evt.id]) { + s[evt.id].status = evt.status; + s[evt.id].reason = evt.reason; + s[evt.id].error = evt.error; + s[evt.id].percentage = evt.percentage; + } + return { ...s }; + }); + }, []); + + const discoverEmbedsEvent = useCallback( + (evt: ScraperEvent<"discoverEmbeds">) => { + setSources((s) => { + evt.embeds.forEach((v) => { + const source = providers.getMetadata(v.embedScraperId); + if (!source) throw new Error("invalid source id"); + const out: ScrapingSegment = { + embedId: v.embedScraperId, + name: source.name, + id: v.id, + status: "waiting", + percentage: 0, + }; + s[v.id] = out; + }); + return { ...s }; + }); + setSourceOrder((s) => { + const source = s.find((v) => v.id === evt.sourceId); + if (!source) throw new Error("invalid source id"); + source.children = evt.embeds.map((v) => v.id); + return [...s]; + }); + }, + [] + ); + + const startScrape = useCallback(() => { + lastId.current = null; + }, []); + + const getResult = useCallback((output: RunOutput | null) => { + if (output && lastId.current) { + setSources((s) => { + if (!lastId.current) return s; + if (s[lastId.current]) s[lastId.current].status = "success"; + return { ...s }; + }); + } + return output; + }, []); + + return { + initEvent, + startEvent, + updateEvent, + discoverEmbedsEvent, + startScrape, + getResult, + sources, + sourceOrder, + currentSource, + }; +} + +export function useScrape() { + const { + sources, + sourceOrder, + currentSource, + updateEvent, + discoverEmbedsEvent, + initEvent, + getResult, + startEvent, + startScrape, + } = useBaseScrape(); const startScraping = useCallback( async (media: ScrapeMedia) => { - if (!providers) return null; + const providerApiUrl = getLoadbalancedProviderApiUrl(); + if (providerApiUrl) { + startScrape(); + const sseOutput = await new Promise( + (resolve, reject) => { + const scrapeEvents = new EventSource(providerApiUrl); + scrapeEvents.addEventListener("error", (err) => reject(err)); + scrapeEvents.addEventListener("init", (e) => initEvent(e.data)); + scrapeEvents.addEventListener("start", (e) => startEvent(e.data)); + scrapeEvents.addEventListener("update", (e) => updateEvent(e.data)); + scrapeEvents.addEventListener("discoverEmbeds", (e) => + discoverEmbedsEvent(e.data) + ); + scrapeEvents.addEventListener("finish", (e) => resolve(e.data)); + } + ); + return getResult(sseOutput); + } - let lastId: string | null = null; + if (!providers) return null; + startScrape(); const output = await providers.runAll({ media, events: { - init(evt) { - setSources( - evt.sourceIds - .map((v) => { - const source = providers.getMetadata(v); - if (!source) throw new Error("invalid source id"); - const out: ScrapingSegment = { - name: source.name, - id: source.id, - status: "waiting", - percentage: 0, - }; - return out; - }) - .reduce>((a, v) => { - a[v.id] = v; - return a; - }, {}) - ); - setSourceOrder(evt.sourceIds.map((v) => ({ id: v, children: [] }))); - }, - start(id) { - setSources((s) => { - if (s[id]) s[id].status = "pending"; - return { ...s }; - }); - setCurrentSource(id); - lastId = id; - }, - update(evt) { - setSources((s) => { - if (s[evt.id]) { - s[evt.id].status = evt.status; - s[evt.id].reason = evt.reason; - s[evt.id].error = evt.error; - s[evt.id].percentage = evt.percentage; - } - return { ...s }; - }); - }, - discoverEmbeds(evt) { - setSources((s) => { - evt.embeds.forEach((v) => { - const source = providers.getMetadata(v.embedScraperId); - if (!source) throw new Error("invalid source id"); - const out: ScrapingSegment = { - embedId: v.embedScraperId, - name: source.name, - id: v.id, - status: "waiting", - percentage: 0, - }; - s[v.id] = out; - }); - return { ...s }; - }); - setSourceOrder((s) => { - const source = s.find((v) => v.id === evt.sourceId); - if (!source) throw new Error("invalid source id"); - source.children = evt.embeds.map((v) => v.id); - return [...s]; - }); - }, + init: initEvent, + start: startEvent, + update: updateEvent, + discoverEmbeds: discoverEmbedsEvent, }, }); - - if (output && lastId) { - setSources((s) => { - if (!lastId) return s; - if (s[lastId]) s[lastId].status = "success"; - return { ...s }; - }); - } - - return output; + return getResult(output); }, - [setSourceOrder, setSources] + [ + initEvent, + startEvent, + updateEvent, + discoverEmbedsEvent, + getResult, + startScrape, + ] ); return { diff --git a/src/utils/providers.ts b/src/utils/providers.ts index 07b91bff..35ebe80d 100644 --- a/src/utils/providers.ts +++ b/src/utils/providers.ts @@ -7,22 +7,25 @@ import { targets, } from "@movie-web/providers"; -import { conf } from "@/setup/config"; -import { useAuthStore } from "@/stores/auth"; +import { getProviderApiUrls, getProxyUrls } from "@/utils/proxyUrls"; -const originalUrls = conf().PROXY_URLS; -let fetchersIndex = -1; - -export function getLoadbalancedProxyUrl() { - const fetchers = useAuthStore.getState().proxySet ?? originalUrls; - if (fetchersIndex === -1 || fetchersIndex >= fetchers.length) { - fetchersIndex = Math.floor(Math.random() * fetchers.length); - } - const proxyUrl = fetchers[fetchersIndex]; - fetchersIndex = (fetchersIndex + 1) % fetchers.length; - return proxyUrl; +function makeLoadbalancedList(getter: () => string[]) { + let listIndex = -1; + return () => { + const fetchers = getter(); + if (listIndex === -1 || listIndex >= fetchers.length) { + listIndex = Math.floor(Math.random() * fetchers.length); + } + const proxyUrl = fetchers[listIndex]; + listIndex = (listIndex + 1) % fetchers.length; + return proxyUrl; + }; } +const getLoadbalancedProxyUrl = makeLoadbalancedList(getProxyUrls); +export const getLoadbalancedProviderApiUrl = + makeLoadbalancedList(getProviderApiUrls); + function makeLoadBalancedSimpleProxyFetcher() { const fetcher: ProviderBuilderOptions["fetcher"] = (a, b) => { const currentFetcher = makeSimpleProxyFetcher( diff --git a/src/utils/proxyUrls.ts b/src/utils/proxyUrls.ts new file mode 100644 index 00000000..64c91b34 --- /dev/null +++ b/src/utils/proxyUrls.ts @@ -0,0 +1,77 @@ +import { conf } from "@/setup/config"; +import { useAuthStore } from "@/stores/auth"; + +const originalUrls = conf().PROXY_URLS; +const types = ["proxy", "api"] as const; + +type ParsedUrlType = (typeof types)[number]; + +export interface ParsedUrl { + url: string; + type: ParsedUrlType; +} + +function canParseUrl(url: string): boolean { + try { + return !!new URL(url); + } catch { + return false; + } +} + +function isParsedUrlType(type: string): type is ParsedUrlType { + return types.includes(type as any); +} + +/** + * Turn a string like "a=b,c=d,d=e" into a dictionary object + */ +function parseParams(input: string): Record { + const entriesParams = input + .split(",") + .map((param) => param.split("=", 2).filter((part) => part.length !== 0)) + .filter((v) => v.length === 2); + return Object.fromEntries(entriesParams); +} + +export function getParsedUrls() { + const urls = useAuthStore.getState().proxySet ?? originalUrls; + const output: ParsedUrl[] = []; + urls.forEach((url) => { + if (!url.startsWith("|")) { + if (canParseUrl(url)) { + output.push({ + url, + type: "proxy", + }); + return; + } + } + + const match = url.match(/^|([^|])+|(.*)$/g); + if (!match || !match[2]) return; + if (!canParseUrl(match[2])) return; + const params = parseParams(match[1]); + const type = params.type ?? "proxy"; + + if (!isParsedUrlType(type)) return; + output.push({ + url: match[2], + type, + }); + }); + + return output; +} + +export function getProxyUrls() { + return getParsedUrls() + .filter((v) => v.type === "proxy") + .map((v) => v.url); +} + +export function getProviderApiUrls() { + return getParsedUrls() + .filter((v) => v.type === "api") + .map((v) => v.url); +}