Twitter's Recommendation Algorithm - Heavy Ranker and TwHIN embeddings

This commit is contained in:
twitter-team
2023-03-31 13:05:14 -05:00
commit 78c3235eee
111 changed files with 11876 additions and 0 deletions

View File

@ -0,0 +1 @@
from tml.common.filesystem.util import infer_fs, is_gcs_fs, is_local_fs

View File

@ -0,0 +1,16 @@
"""Minimal test for infer_fs.
Mostly a test that it returns an object
"""
from tml.common.filesystem import infer_fs
def test_infer_fs():
local_path = "/tmp/local_path"
gcs_path = "gs://somebucket/somepath"
local_fs = infer_fs(local_path)
gcs_fs = infer_fs(gcs_path)
# This should return two different objects
assert local_fs != gcs_fs

25
common/filesystem/util.py Normal file
View File

@ -0,0 +1,25 @@
"""Utilities for interacting with the file systems."""
from fsspec.implementations.local import LocalFileSystem
import gcsfs
GCS_FS = gcsfs.GCSFileSystem(cache_timeout=-1)
LOCAL_FS = LocalFileSystem()
def infer_fs(path: str):
if path.startswith("gs://"):
return GCS_FS
elif path.startswith("hdfs://"):
# We can probably use pyarrow HDFS to support this.
raise NotImplementedError("HDFS not yet supported")
else:
return LOCAL_FS
def is_local_fs(fs):
return fs == LOCAL_FS
def is_gcs_fs(fs):
return fs == GCS_FS