the-algorithm/src/scala/com/twitter/interaction_graph/bqe/training/candidates.sql
twitter-team ef4c5eb65e Twitter Recommendation Algorithm
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
2023-03-31 17:36:31 -05:00

19 lines
795 B
SQL

-- get latest partition of candidates with data
DECLARE date_candidates DATE;
SET date_candidates = (SELECT DATE(TIMESTAMP_MILLIS($start_time$)));
CREATE TABLE IF NOT EXISTS `twttr-recos-ml-prod.realgraph.candidates_sampled` AS
SELECT * FROM `twttr-recos-ml-prod.realgraph.candidates_for_training` LIMIT 100;
-- remove previous output snapshot (if exists) to avoid double-writing
DELETE
FROM `twttr-recos-ml-prod.realgraph.candidates_sampled`
WHERE ds = date_candidates;
-- sample from candidates table instead of recomputing features
INSERT INTO `twttr-recos-ml-prod.realgraph.candidates_sampled`
SELECT * FROM `twttr-recos-ml-prod.realgraph.candidates_for_training`
WHERE MOD(ABS(FARM_FINGERPRINT(CONCAT(source_id, '_', destination_id))), 100) = $mod_remainder$
AND ds = date_candidates;