mirror of
https://github.com/twitter/the-algorithm-ml.git
synced 2025-06-13 12:58:39 +02:00
Twitter's Recommendation Algorithm - Heavy Ranker and TwHIN embeddings
This commit is contained in:
1
common/filesystem/__init__.py
Normal file
1
common/filesystem/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from tml.common.filesystem.util import infer_fs, is_gcs_fs, is_local_fs
|
16
common/filesystem/test_infer_fs.py
Normal file
16
common/filesystem/test_infer_fs.py
Normal file
@ -0,0 +1,16 @@
|
||||
"""Minimal test for infer_fs.
|
||||
|
||||
Mostly a test that it returns an object
|
||||
"""
|
||||
from tml.common.filesystem import infer_fs
|
||||
|
||||
|
||||
def test_infer_fs():
|
||||
local_path = "/tmp/local_path"
|
||||
gcs_path = "gs://somebucket/somepath"
|
||||
|
||||
local_fs = infer_fs(local_path)
|
||||
gcs_fs = infer_fs(gcs_path)
|
||||
|
||||
# This should return two different objects
|
||||
assert local_fs != gcs_fs
|
25
common/filesystem/util.py
Normal file
25
common/filesystem/util.py
Normal file
@ -0,0 +1,25 @@
|
||||
"""Utilities for interacting with the file systems."""
|
||||
from fsspec.implementations.local import LocalFileSystem
|
||||
import gcsfs
|
||||
|
||||
|
||||
GCS_FS = gcsfs.GCSFileSystem(cache_timeout=-1)
|
||||
LOCAL_FS = LocalFileSystem()
|
||||
|
||||
|
||||
def infer_fs(path: str):
|
||||
if path.startswith("gs://"):
|
||||
return GCS_FS
|
||||
elif path.startswith("hdfs://"):
|
||||
# We can probably use pyarrow HDFS to support this.
|
||||
raise NotImplementedError("HDFS not yet supported")
|
||||
else:
|
||||
return LOCAL_FS
|
||||
|
||||
|
||||
def is_local_fs(fs):
|
||||
return fs == LOCAL_FS
|
||||
|
||||
|
||||
def is_gcs_fs(fs):
|
||||
return fs == GCS_FS
|
Reference in New Issue
Block a user