mirror of
https://github.com/movie-web/movie-web.git
synced 2024-11-11 02:05:09 +01:00
Parse provider API urls + use new provider api in runAll scrape
This commit is contained in:
parent
de30929dd6
commit
a52fac701a
@ -1,7 +1,11 @@
|
||||
import { ScrapeMedia } from "@movie-web/providers";
|
||||
import {
|
||||
FullScraperEvents,
|
||||
RunOutput,
|
||||
ScrapeMedia,
|
||||
} from "@movie-web/providers";
|
||||
import { RefObject, useCallback, useEffect, useRef, useState } from "react";
|
||||
|
||||
import { providers } from "@/utils/providers";
|
||||
import { getLoadbalancedProviderApiUrl, providers } from "@/utils/providers";
|
||||
|
||||
export interface ScrapingItems {
|
||||
id: string;
|
||||
@ -18,96 +22,169 @@ export interface ScrapingSegment {
|
||||
percentage: number;
|
||||
}
|
||||
|
||||
export function useScrape() {
|
||||
type ScraperEvent<Event extends keyof FullScraperEvents> = Parameters<
|
||||
NonNullable<FullScraperEvents[Event]>
|
||||
>[0];
|
||||
|
||||
function useBaseScrape() {
|
||||
const [sources, setSources] = useState<Record<string, ScrapingSegment>>({});
|
||||
const [sourceOrder, setSourceOrder] = useState<ScrapingItems[]>([]);
|
||||
const [currentSource, setCurrentSource] = useState<string>();
|
||||
const lastId = useRef<string | null>(null);
|
||||
|
||||
const initEvent = useCallback((evt: ScraperEvent<"init">) => {
|
||||
setSources(
|
||||
evt.sourceIds
|
||||
.map((v) => {
|
||||
const source = providers.getMetadata(v);
|
||||
if (!source) throw new Error("invalid source id");
|
||||
const out: ScrapingSegment = {
|
||||
name: source.name,
|
||||
id: source.id,
|
||||
status: "waiting",
|
||||
percentage: 0,
|
||||
};
|
||||
return out;
|
||||
})
|
||||
.reduce<Record<string, ScrapingSegment>>((a, v) => {
|
||||
a[v.id] = v;
|
||||
return a;
|
||||
}, {})
|
||||
);
|
||||
setSourceOrder(evt.sourceIds.map((v) => ({ id: v, children: [] })));
|
||||
}, []);
|
||||
|
||||
const startEvent = useCallback((id: ScraperEvent<"start">) => {
|
||||
setSources((s) => {
|
||||
if (s[id]) s[id].status = "pending";
|
||||
return { ...s };
|
||||
});
|
||||
setCurrentSource(id);
|
||||
lastId.current = id;
|
||||
}, []);
|
||||
|
||||
const updateEvent = useCallback((evt: ScraperEvent<"update">) => {
|
||||
setSources((s) => {
|
||||
if (s[evt.id]) {
|
||||
s[evt.id].status = evt.status;
|
||||
s[evt.id].reason = evt.reason;
|
||||
s[evt.id].error = evt.error;
|
||||
s[evt.id].percentage = evt.percentage;
|
||||
}
|
||||
return { ...s };
|
||||
});
|
||||
}, []);
|
||||
|
||||
const discoverEmbedsEvent = useCallback(
|
||||
(evt: ScraperEvent<"discoverEmbeds">) => {
|
||||
setSources((s) => {
|
||||
evt.embeds.forEach((v) => {
|
||||
const source = providers.getMetadata(v.embedScraperId);
|
||||
if (!source) throw new Error("invalid source id");
|
||||
const out: ScrapingSegment = {
|
||||
embedId: v.embedScraperId,
|
||||
name: source.name,
|
||||
id: v.id,
|
||||
status: "waiting",
|
||||
percentage: 0,
|
||||
};
|
||||
s[v.id] = out;
|
||||
});
|
||||
return { ...s };
|
||||
});
|
||||
setSourceOrder((s) => {
|
||||
const source = s.find((v) => v.id === evt.sourceId);
|
||||
if (!source) throw new Error("invalid source id");
|
||||
source.children = evt.embeds.map((v) => v.id);
|
||||
return [...s];
|
||||
});
|
||||
},
|
||||
[]
|
||||
);
|
||||
|
||||
const startScrape = useCallback(() => {
|
||||
lastId.current = null;
|
||||
}, []);
|
||||
|
||||
const getResult = useCallback((output: RunOutput | null) => {
|
||||
if (output && lastId.current) {
|
||||
setSources((s) => {
|
||||
if (!lastId.current) return s;
|
||||
if (s[lastId.current]) s[lastId.current].status = "success";
|
||||
return { ...s };
|
||||
});
|
||||
}
|
||||
return output;
|
||||
}, []);
|
||||
|
||||
return {
|
||||
initEvent,
|
||||
startEvent,
|
||||
updateEvent,
|
||||
discoverEmbedsEvent,
|
||||
startScrape,
|
||||
getResult,
|
||||
sources,
|
||||
sourceOrder,
|
||||
currentSource,
|
||||
};
|
||||
}
|
||||
|
||||
export function useScrape() {
|
||||
const {
|
||||
sources,
|
||||
sourceOrder,
|
||||
currentSource,
|
||||
updateEvent,
|
||||
discoverEmbedsEvent,
|
||||
initEvent,
|
||||
getResult,
|
||||
startEvent,
|
||||
startScrape,
|
||||
} = useBaseScrape();
|
||||
|
||||
const startScraping = useCallback(
|
||||
async (media: ScrapeMedia) => {
|
||||
if (!providers) return null;
|
||||
const providerApiUrl = getLoadbalancedProviderApiUrl();
|
||||
if (providerApiUrl) {
|
||||
startScrape();
|
||||
const sseOutput = await new Promise<RunOutput | null>(
|
||||
(resolve, reject) => {
|
||||
const scrapeEvents = new EventSource(providerApiUrl);
|
||||
scrapeEvents.addEventListener("error", (err) => reject(err));
|
||||
scrapeEvents.addEventListener("init", (e) => initEvent(e.data));
|
||||
scrapeEvents.addEventListener("start", (e) => startEvent(e.data));
|
||||
scrapeEvents.addEventListener("update", (e) => updateEvent(e.data));
|
||||
scrapeEvents.addEventListener("discoverEmbeds", (e) =>
|
||||
discoverEmbedsEvent(e.data)
|
||||
);
|
||||
scrapeEvents.addEventListener("finish", (e) => resolve(e.data));
|
||||
}
|
||||
);
|
||||
return getResult(sseOutput);
|
||||
}
|
||||
|
||||
let lastId: string | null = null;
|
||||
if (!providers) return null;
|
||||
startScrape();
|
||||
const output = await providers.runAll({
|
||||
media,
|
||||
events: {
|
||||
init(evt) {
|
||||
setSources(
|
||||
evt.sourceIds
|
||||
.map((v) => {
|
||||
const source = providers.getMetadata(v);
|
||||
if (!source) throw new Error("invalid source id");
|
||||
const out: ScrapingSegment = {
|
||||
name: source.name,
|
||||
id: source.id,
|
||||
status: "waiting",
|
||||
percentage: 0,
|
||||
};
|
||||
return out;
|
||||
})
|
||||
.reduce<Record<string, ScrapingSegment>>((a, v) => {
|
||||
a[v.id] = v;
|
||||
return a;
|
||||
}, {})
|
||||
);
|
||||
setSourceOrder(evt.sourceIds.map((v) => ({ id: v, children: [] })));
|
||||
},
|
||||
start(id) {
|
||||
setSources((s) => {
|
||||
if (s[id]) s[id].status = "pending";
|
||||
return { ...s };
|
||||
});
|
||||
setCurrentSource(id);
|
||||
lastId = id;
|
||||
},
|
||||
update(evt) {
|
||||
setSources((s) => {
|
||||
if (s[evt.id]) {
|
||||
s[evt.id].status = evt.status;
|
||||
s[evt.id].reason = evt.reason;
|
||||
s[evt.id].error = evt.error;
|
||||
s[evt.id].percentage = evt.percentage;
|
||||
}
|
||||
return { ...s };
|
||||
});
|
||||
},
|
||||
discoverEmbeds(evt) {
|
||||
setSources((s) => {
|
||||
evt.embeds.forEach((v) => {
|
||||
const source = providers.getMetadata(v.embedScraperId);
|
||||
if (!source) throw new Error("invalid source id");
|
||||
const out: ScrapingSegment = {
|
||||
embedId: v.embedScraperId,
|
||||
name: source.name,
|
||||
id: v.id,
|
||||
status: "waiting",
|
||||
percentage: 0,
|
||||
};
|
||||
s[v.id] = out;
|
||||
});
|
||||
return { ...s };
|
||||
});
|
||||
setSourceOrder((s) => {
|
||||
const source = s.find((v) => v.id === evt.sourceId);
|
||||
if (!source) throw new Error("invalid source id");
|
||||
source.children = evt.embeds.map((v) => v.id);
|
||||
return [...s];
|
||||
});
|
||||
},
|
||||
init: initEvent,
|
||||
start: startEvent,
|
||||
update: updateEvent,
|
||||
discoverEmbeds: discoverEmbedsEvent,
|
||||
},
|
||||
});
|
||||
|
||||
if (output && lastId) {
|
||||
setSources((s) => {
|
||||
if (!lastId) return s;
|
||||
if (s[lastId]) s[lastId].status = "success";
|
||||
return { ...s };
|
||||
});
|
||||
}
|
||||
|
||||
return output;
|
||||
return getResult(output);
|
||||
},
|
||||
[setSourceOrder, setSources]
|
||||
[
|
||||
initEvent,
|
||||
startEvent,
|
||||
updateEvent,
|
||||
discoverEmbedsEvent,
|
||||
getResult,
|
||||
startScrape,
|
||||
]
|
||||
);
|
||||
|
||||
return {
|
||||
|
@ -7,22 +7,25 @@ import {
|
||||
targets,
|
||||
} from "@movie-web/providers";
|
||||
|
||||
import { conf } from "@/setup/config";
|
||||
import { useAuthStore } from "@/stores/auth";
|
||||
import { getProviderApiUrls, getProxyUrls } from "@/utils/proxyUrls";
|
||||
|
||||
const originalUrls = conf().PROXY_URLS;
|
||||
let fetchersIndex = -1;
|
||||
|
||||
export function getLoadbalancedProxyUrl() {
|
||||
const fetchers = useAuthStore.getState().proxySet ?? originalUrls;
|
||||
if (fetchersIndex === -1 || fetchersIndex >= fetchers.length) {
|
||||
fetchersIndex = Math.floor(Math.random() * fetchers.length);
|
||||
}
|
||||
const proxyUrl = fetchers[fetchersIndex];
|
||||
fetchersIndex = (fetchersIndex + 1) % fetchers.length;
|
||||
return proxyUrl;
|
||||
function makeLoadbalancedList(getter: () => string[]) {
|
||||
let listIndex = -1;
|
||||
return () => {
|
||||
const fetchers = getter();
|
||||
if (listIndex === -1 || listIndex >= fetchers.length) {
|
||||
listIndex = Math.floor(Math.random() * fetchers.length);
|
||||
}
|
||||
const proxyUrl = fetchers[listIndex];
|
||||
listIndex = (listIndex + 1) % fetchers.length;
|
||||
return proxyUrl;
|
||||
};
|
||||
}
|
||||
|
||||
const getLoadbalancedProxyUrl = makeLoadbalancedList(getProxyUrls);
|
||||
export const getLoadbalancedProviderApiUrl =
|
||||
makeLoadbalancedList(getProviderApiUrls);
|
||||
|
||||
function makeLoadBalancedSimpleProxyFetcher() {
|
||||
const fetcher: ProviderBuilderOptions["fetcher"] = (a, b) => {
|
||||
const currentFetcher = makeSimpleProxyFetcher(
|
||||
|
77
src/utils/proxyUrls.ts
Normal file
77
src/utils/proxyUrls.ts
Normal file
@ -0,0 +1,77 @@
|
||||
import { conf } from "@/setup/config";
|
||||
import { useAuthStore } from "@/stores/auth";
|
||||
|
||||
const originalUrls = conf().PROXY_URLS;
|
||||
const types = ["proxy", "api"] as const;
|
||||
|
||||
type ParsedUrlType = (typeof types)[number];
|
||||
|
||||
export interface ParsedUrl {
|
||||
url: string;
|
||||
type: ParsedUrlType;
|
||||
}
|
||||
|
||||
function canParseUrl(url: string): boolean {
|
||||
try {
|
||||
return !!new URL(url);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function isParsedUrlType(type: string): type is ParsedUrlType {
|
||||
return types.includes(type as any);
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn a string like "a=b,c=d,d=e" into a dictionary object
|
||||
*/
|
||||
function parseParams(input: string): Record<string, string> {
|
||||
const entriesParams = input
|
||||
.split(",")
|
||||
.map((param) => param.split("=", 2).filter((part) => part.length !== 0))
|
||||
.filter((v) => v.length === 2);
|
||||
return Object.fromEntries(entriesParams);
|
||||
}
|
||||
|
||||
export function getParsedUrls() {
|
||||
const urls = useAuthStore.getState().proxySet ?? originalUrls;
|
||||
const output: ParsedUrl[] = [];
|
||||
urls.forEach((url) => {
|
||||
if (!url.startsWith("|")) {
|
||||
if (canParseUrl(url)) {
|
||||
output.push({
|
||||
url,
|
||||
type: "proxy",
|
||||
});
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const match = url.match(/^|([^|])+|(.*)$/g);
|
||||
if (!match || !match[2]) return;
|
||||
if (!canParseUrl(match[2])) return;
|
||||
const params = parseParams(match[1]);
|
||||
const type = params.type ?? "proxy";
|
||||
|
||||
if (!isParsedUrlType(type)) return;
|
||||
output.push({
|
||||
url: match[2],
|
||||
type,
|
||||
});
|
||||
});
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
export function getProxyUrls() {
|
||||
return getParsedUrls()
|
||||
.filter((v) => v.type === "proxy")
|
||||
.map((v) => v.url);
|
||||
}
|
||||
|
||||
export function getProviderApiUrls() {
|
||||
return getParsedUrls()
|
||||
.filter((v) => v.type === "api")
|
||||
.map((v) => v.url);
|
||||
}
|
Loading…
Reference in New Issue
Block a user