Merge 858525b9d8aaaf856f2ea91079a2940f3c36a419 into 72eda9a24f815f6d566818cbf8518138e29d83e9

2025-02-10 17:38:58 +01:00 · 2023-07-17 21:39:36 -05:00 · 2023-07-17 21:39:36 -05:00 · d03bf89cbf
commit d03bf89cbf
parent 72eda9a24f 858525b9d8
22 changed files with 131 additions and 123 deletions
--- a/ann/src/main/python/dataflow/faiss_index_bq_dataset.py
+++ b/ann/src/main/python/dataflow/faiss_index_bq_dataset.py
@ -6,8 +6,8 @@ import sys
 from urllib.parse import urlsplit

 import apache_beam as beam
-from apache_beam.options.pipeline_options import PipelineOptions
 import faiss
+from apache_beam.options.pipeline_options import PipelineOptions


 def parse_d6w_config(argv=None):
@ -160,8 +160,8 @@ class MergeAndBuildIndex(beam.CombineFn):
    import subprocess

    import faiss
-    from google.cloud import storage
    import numpy as np
+    from google.cloud import storage

    client = storage.Client()
    bucket = client.get_bucket(self.bucket_name)
--- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/example_weights.py
+++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/example_weights.py
@ -1,5 +1,6 @@
 # checkstyle: noqa
 import tensorflow.compat.v1 as tf
+
 from .constants import INDEX_BY_LABEL, LABEL_NAMES

 # TODO: Read these from command line arguments, since they specify the existing example weights in the input data.
--- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/data_helpers.py
+++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/data_helpers.py
@ -1,7 +1,9 @@
 # checkstyle: noqa
 import tensorflow.compat.v1 as tf
+
 from ..constants import EB_SCORE_IDX

+
 # The rationale behind this logic is available at TQ-9678.
 def get_lolly_logits(labels):
  '''
--- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/score.py
+++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/score.py
@ -4,7 +4,6 @@ from .parsers import DBv2DataExampleParser
 from .reader import LollyModelReader
 from .scorer import LollyModelScorer

-
 if __name__ == "__main__":
  lolly_model_reader = LollyModelReader(lolly_model_file_path=sys.argv[1])
  lolly_model_scorer = LollyModelScorer(data_example_parser=DBv2DataExampleParser(lolly_model_reader))
--- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/metrics.py
+++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/metrics.py
@ -1,10 +1,13 @@
 # checkstyle: noqa
-import tensorflow.compat.v1 as tf
 from collections import OrderedDict
+
+import tensorflow.compat.v1 as tf
+
+import twml
+
 from .constants import EB_SCORE_IDX
 from .lolly.data_helpers import get_lolly_scores

-import twml

 def get_multi_binary_class_metric_fn(metrics, classes=None, class_dim=1):
  """
--- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/discretizer_builder.py
+++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/discretizer_builder.py
@ -1,7 +1,8 @@
-from .hashing_utils import make_feature_id
+import numpy as np

 from twml.contrib.layers.hashing_discretizer import HashingDiscretizer
-import numpy as np
+
+from .hashing_utils import make_feature_id


 class TFModelDiscretizerBuilder(object):
--- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/hashing_utils.py
+++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/hashing_utils.py
@ -1,6 +1,5 @@
-from twitter.deepbird.io.util import _get_feature_id
-
 import numpy as np
+from twitter.deepbird.io.util import _get_feature_id


 def numpy_hashing_uniform(the_id, bin_idx, output_bits):
--- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/weights_initializer_builder.py
+++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/weights_initializer_builder.py
@ -1,9 +1,10 @@
-from .hashing_utils import make_feature_id, numpy_hashing_uniform
-
 import numpy as np
 import tensorflow.compat.v1 as tf
+
 import twml

+from .hashing_utils import make_feature_id, numpy_hashing_uniform
+

 class TFModelWeightsInitializerBuilder(object):
  def __init__(self, num_bits):
--- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/train.py
+++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/train.py
@ -1,26 +1,32 @@
 # checkstyle: noqa
+from datetime import datetime
+
 import tensorflow.compat.v1 as tf
-from tensorflow.python.estimator.export.export import build_raw_serving_input_receiver_fn
+import tensorflow_hub as hub
+from tensorflow.compat.v1 import logging
+from tensorflow.python.estimator.export.export import (
+  build_raw_serving_input_receiver_fn,
+)
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
-import tensorflow_hub as hub
-
-from datetime import datetime
-from tensorflow.compat.v1 import logging
 from twitter.deepbird.projects.timelines.configs import all_configs
+
+import twml
+from twml.contrib.calibrators.common_calibrators import (
+  build_percentile_discretizer_graph,
+  calibrate_discretizer_and_export,
+)
 from twml.trainers import DataRecordTrainer
-from twml.contrib.calibrators.common_calibrators import build_percentile_discretizer_graph
-from twml.contrib.calibrators.common_calibrators import calibrate_discretizer_and_export
-from .metrics import get_multi_binary_class_metric_fn
-from .constants import TARGET_LABEL_IDX, PREDICTED_CLASSES
+
+from .constants import PREDICTED_CLASSES, TARGET_LABEL_IDX
 from .example_weights import add_weight_arguments, make_weights_tensor
 from .lolly.data_helpers import get_lolly_logits
-from .lolly.tf_model_initializer_builder import TFModelInitializerBuilder
 from .lolly.reader import LollyModelReader
+from .lolly.tf_model_initializer_builder import TFModelInitializerBuilder
+from .metrics import get_multi_binary_class_metric_fn
 from .tf_model.discretizer_builder import TFModelDiscretizerBuilder
 from .tf_model.weights_initializer_builder import TFModelWeightsInitializerBuilder

-import twml

 def get_feature_values(features_values, params):
  if params.lolly_model_tsv:
--- a/trust_and_safety_models/abusive/abusive_model.py
+++ b/trust_and_safety_models/abusive/abusive_model.py
@ -1,19 +1,45 @@
+import datetime
+import os
+from dataclasses import asdict
+
+import numpy as np
+import pandas as pd
 import tensorflow as tf
+import tensorflow_hub as hub
+import utils
+import wandb
+
+try:
+  wandb_key = ...
+  wandb.login(...)
+  run = wandb.init(project='ptos_with_media',
+             group='new-split-trains',
+             notes='tweet text with only (num_media, precision_nsfw). on full train set, new split.',
+             entity='absv',
+             config=params,
+             name='tweet-text-w-nsfw-1.1',
+             sync_tensorboard=True)
+except FileNotFoundError:
+  print('Wandb key not found')
+  run = wandb.init(mode='disabled')
+
+
+from notebook_eval_utils import EvalConfig, SparseMultilabelEvaluator
+from twitter.cuad.representation.models.optimization import create_optimizer
+from twitter.cuad.representation.models.text_encoder import TextEncoder
+from twitter.hmli.nimbus.modeling.feature_encoder import FeatureEncoder
+from twitter.hmli.nimbus.modeling.feature_loader import BigQueryFeatureLoader
+from twitter.hmli.nimbus.modeling.model_config import (
+  EncodingType,
+  Feature,
+  FeatureType,
+  Model,
+)

 physical_devices = tf.config.list_physical_devices('GPU') 
 for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)

-from twitter.hmli.nimbus.modeling.model_config import FeatureType, EncodingType, Feature, Model, LogType
-from twitter.hmli.nimbus.modeling.feature_loader import BigQueryFeatureLoader
-from twitter.cuad.representation.models.text_encoder import TextEncoder
-from twitter.cuad.representation.models.optimization import create_optimizer
-from twitter.hmli.nimbus.modeling.feature_encoder import FeatureEncoder
-
-import numpy as np
-import pandas as pd
-import utils
-
 cat_names = [
 ...
 ]
@ -75,7 +101,6 @@ params = {
  'model_type': 'twitter_multilingual_bert_base_cased_mlm', 
  'mixed_precision': True,
 }
-params

 def parse_labeled_data(row_dict):
  label = [row_dict.pop(l) for l in labels]
@ -134,7 +159,9 @@ with mirrored_strategy.scope():
  )
  pr_auc = tf.keras.metrics.AUC(curve="PR", num_thresholds=1000, multi_label=True, from_logits=True)

-  custom_loss = lambda y_true, y_pred: utils.multilabel_weighted_loss(y_true, y_pred, weights=pos_weight_tensor)
+  def custom_loss(y_true, y_pred):
+      return utils.multilabel_weighted_loss(y_true, y_pred, weights=pos_weight_tensor)
+
  optimizer = create_optimizer(
    init_lr=params["lr"], 
    num_train_steps=(params["epochs"] * params["steps_per_epoch"]),
@ -154,25 +181,6 @@ model.weights
 model.summary()
 pr_auc.name

-import getpass
-import wandb
-from wandb.keras import WandbCallback
-try:
-  wandb_key = ...
-  wandb.login(...)
-  run = wandb.init(project='ptos_with_media',
-             group='new-split-trains',
-             notes='tweet text with only (num_media, precision_nsfw). on full train set, new split.',
-             entity='absv',
-             config=params,
-             name='tweet-text-w-nsfw-1.1',
-             sync_tensorboard=True)
-except FileNotFoundError:
-  print('Wandb key not found')
-  run = wandb.init(mode='disabled')
-import datetime
-import os
-
 start_train_time = datetime.datetime.now()
 print(start_train_time.strftime("%m-%d-%Y (%H:%M:%S)"))
 checkpoint_path = os.path.join("...")
@ -195,8 +203,6 @@ model.fit(train_ds, epochs=params["epochs"], validation_data=val_ds, callbacks=[
        steps_per_epoch=params["steps_per_epoch"], 
        verbose=2)

-import tensorflow_hub as hub
-
 gs_model_path = ...
 reloaded_keras_layer = hub.KerasLayer(gs_model_path)
 inputs = tf.keras.layers.Input(name="tweet__core__tweet__text", shape=(1,), dtype=tf.string)
@ -233,9 +239,6 @@ test_media_not_nsfw = test.filter(lambda x, y: tf.logical_and(tf.equal(x["has_me
 for d in [test, test_only_media, test_only_nsfw, test_no_media, test_media_not_nsfw]:
  print(d.reduce(0, lambda x, _: x + 1).numpy())

-from notebook_eval_utils import SparseMultilabelEvaluator, EvalConfig
-from dataclasses import asdict
-
 def display_metrics(probs, targets, labels=labels):
  eval_config = EvalConfig(prediction_threshold=0.5, precision_k=0.9)
  for eval_mode, y_mask in [("implicit", np.ones(targets.shape))]:
@ -273,4 +276,4 @@ for name, x in [(name, m.pr_auc.to_string(index=False).strip().split("\n")) for
    print(y.strip(), end="\t")
  print(".")
 for d in [test, test_only_media, test_only_nsfw, test_no_media, test_media_not_nsfw]:
-  print(d.reduce(0, lambda x, _: x + 1).numpy())
+  print(d.reduce(0, lambda x, _: x + 1).numpy())
--- a/trust_and_safety_models/nsfw/nsfw_media.py
+++ b/trust_and_safety_models/nsfw/nsfw_media.py
@ -1,21 +1,19 @@
+import glob
+import os
+import random
+
 import kerastuner as kt
-import math
 import numpy as np
 import pandas as pd
-import random
 import sklearn.metrics
 import tensorflow as tf
-import os
-import glob
-
-from tqdm import tqdm
-from matplotlib import pyplot as plt
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import Dense
 from google.cloud import storage
+from matplotlib import pyplot as plt
+from tensorflow.keras.layers import Dense
+from tensorflow.keras.models import Sequential
+from tqdm import tqdm

 physical_devices = tf.config.list_physical_devices('GPU')
-physical_devices

 tf.config.set_visible_devices([tf.config.PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')], 'GPU')
 tf.config.get_visible_devices('GPU')
@ -89,7 +87,7 @@ test_ds = test_ds.map(lambda x: preprocess_embedding_example(x, positive_label=p
  
 if use_sens_prev_data:
  test_sens_prev_glob = f"{sens_prev_input_root}/test/tfrecord/*.tfrecord"
-  test_sens_prev_files =  tf.io.gfile.glob(test_sens_prev_glob)
+  test_sens_prev_files = tf.io.gfile.glob(test_sens_prev_glob)
  
  if not len(test_sens_prev_files):
    raise ValueError(f"Did not find any eval files matching {test_sens_prev_glob}")
@ -109,12 +107,12 @@ train_ds = train_ds.repeat()

 if has_validation_data: 
  eval_glob = f"{input_root}/validation/tfrecord/*.tfrecord"
-  eval_files =  tf.io.gfile.glob(eval_glob)
+  eval_files = tf.io.gfile.glob(eval_glob)
    
  if use_sens_prev_data:
    eval_sens_prev_glob = f"{sens_prev_input_root}/validation/tfrecord/*.tfrecord"
    eval_sens_prev_files = tf.io.gfile.glob(eval_sens_prev_glob)
-    eval_files =  eval_files + eval_sens_prev_files
+    eval_files = eval_files + eval_sens_prev_files
    
    
  if not len(eval_files):
@ -428,7 +426,7 @@ ptAt50fmt = "%.4f" % ptAt50[1]
 ptAt90fmt = "%.4f" % ptAt90[1]
 aucFmt = "%.4f" % auc_precision_recall
 plt.title(
-  f"Keras (nsfw MU test)\nAUC={aucFmt}\np={ptAt50fmt} @ r=0.5\np={ptAt90fmt} @ r=0.9\nN_train={...}} ({...} pos), N_test={n_test} ({n_test_pos} pos)",
+  f"Keras (nsfw MU test)\nAUC={aucFmt}\np={ptAt50fmt} @ r=0.5\np={ptAt90fmt} @ r=0.9\nN_train={...} ({...} pos), N_test={n_test} ({n_test_pos} pos)",
  size=20
 )
 plt.subplots_adjust(top=0.72)
--- a/trust_and_safety_models/nsfw/nsfw_text.py
+++ b/trust_and_safety_models/nsfw/nsfw_text.py
@ -1,14 +1,16 @@
-from datetime import datetime
-from functools import reduce
 import os
-import pandas as pd
-import re
-from sklearn.metrics import average_precision_score, classification_report, precision_recall_curve, PrecisionRecallDisplay
-from sklearn.model_selection import train_test_split
-import tensorflow as tf
-import matplotlib.pyplot as plt
 import re
+from datetime import datetime

+import matplotlib.pyplot as plt
+import pandas as pd
+import tensorflow as tf
+from sklearn.metrics import (
+    average_precision_score,
+    classification_report,
+    precision_recall_curve,
+)
+from sklearn.model_selection import train_test_split
 from twitter.cuad.representation.models.optimization import create_optimizer
 from twitter.cuad.representation.models.text_encoder import TextEncoder

--- a/trust_and_safety_models/toxicity/data/data_preprocessing.py
+++ b/trust_and_safety_models/toxicity/data/data_preprocessing.py
@ -1,10 +1,8 @@
-from abc import ABC
 import re
-
-from toxicity_ml_pipeline.settings.hcomp_settings import TOXIC_35
+from abc import ABC

 import numpy as np
-
+from toxicity_ml_pipeline.settings.hcomp_settings import TOXIC_35

 TOXIC_35_set = set(TOXIC_35)

@ -84,7 +82,7 @@ class DefaultENNoPreprocessor(DataframeCleaner):
      else:
        raise NotImplementedError

-    if "filter_low_agreements" in kwargs and kwargs["filter_low_agreements"] == True:
+    if "filter_low_agreements" in kwargs and kwargs["filter_low_agreements"] is True:
      df.drop(df[(df.agreement_rate <= 0.6)].index, axis=0, inplace=True)
      raise NotImplementedError

--- a/trust_and_safety_models/toxicity/data/dataframe_loader.py
+++ b/trust_and_safety_models/toxicity/data/dataframe_loader.py
@ -287,7 +287,7 @@ class ENLoaderWithSampling(ENLoader):
 class I18nLoader(DataframeLoader):
  def __init__(self):
    super().__init__(project=...)
-    from archive.settings.... import ACCEPTED_LANGUAGES, QUERY_SETTINGS
+    from archive.settings import ACCEPTED_LANGUAGES, QUERY_SETTINGS

    self.accepted_languages = ACCEPTED_LANGUAGES
    self.query_settings = dict(QUERY_SETTINGS)
--- a/trust_and_safety_models/toxicity/data/mb_generator.py
+++ b/trust_and_safety_models/toxicity/data/mb_generator.py
@ -1,6 +1,10 @@
-from importlib import import_module
 import os
+from importlib import import_module

+import numpy as np
+import pandas
+import tensorflow as tf
+from sklearn.model_selection import StratifiedKFold
 from toxicity_ml_pipeline.settings.default_settings_tox import (
  INNER_CV,
  LOCAL_DIR,
@ -12,12 +16,6 @@ from toxicity_ml_pipeline.settings.default_settings_tox import (
 )
 from toxicity_ml_pipeline.utils.helpers import execute_command

-import numpy as np
-import pandas
-from sklearn.model_selection import StratifiedKFold
-import tensorflow as tf
-
-
 try:
  from transformers import AutoTokenizer, DataCollatorWithPadding
 except ModuleNotFoundError:
--- a/trust_and_safety_models/toxicity/load_model.py
+++ b/trust_and_safety_models/toxicity/load_model.py
@ -1,14 +1,13 @@
 import os

 from toxicity_ml_pipeline.settings.default_settings_tox import LOCAL_DIR, MAX_SEQ_LENGTH
+
 try:
  from toxicity_ml_pipeline.optim.losses import MaskedBCE
 except ImportError:
  print('No MaskedBCE loss')
-from toxicity_ml_pipeline.utils.helpers import execute_command
-
 import tensorflow as tf
-
+from toxicity_ml_pipeline.utils.helpers import execute_command

 try:
  from twitter.cuad.representation.models.text_encoder import TextEncoder
@ -102,7 +101,7 @@ def get_loss(loss_name, from_logits, **kwargs):
    multitask = kwargs.get("multitask", False)
    if from_logits or multitask:
      raise NotImplementedError
-    print(f'Masked Binary Cross Entropy')
+    print('Masked Binary Cross Entropy')
    return MaskedBCE()

  if loss_name == "inv_kl_loss":
--- a/trust_and_safety_models/toxicity/optim/callbacks.py
+++ b/trust_and_safety_models/toxicity/optim/callbacks.py
@ -1,14 +1,16 @@
-from collections import defaultdict
 import os
+from collections import defaultdict

-from toxicity_ml_pipeline.settings.default_settings_tox import REMOTE_LOGDIR
-from toxicity_ml_pipeline.settings.default_settings_abs import LABEL_NAMES
-from toxicity_ml_pipeline.utils.absv_utils import parse_labeled_data
-from toxicity_ml_pipeline.utils.helpers import compute_precision_fixed_recall, execute_command
-
-from sklearn.metrics import average_precision_score, roc_auc_score
 import tensorflow as tf
 import wandb
+from sklearn.metrics import average_precision_score, roc_auc_score
+from toxicity_ml_pipeline.settings.default_settings_abs import LABEL_NAMES
+from toxicity_ml_pipeline.settings.default_settings_tox import REMOTE_LOGDIR
+from toxicity_ml_pipeline.utils.absv_utils import parse_labeled_data
+from toxicity_ml_pipeline.utils.helpers import (
+  compute_precision_fixed_recall,
+  execute_command,
+)


 class NothingCallback(tf.keras.callbacks.Callback):
--- a/trust_and_safety_models/toxicity/optim/losses.py
+++ b/trust_and_safety_models/toxicity/optim/losses.py
@ -1,7 +1,7 @@
 import tensorflow as tf
-from keras.utils import tf_utils
-from keras.utils import losses_utils
 from keras import backend
+from keras.utils import losses_utils, tf_utils
+

 def inv_kl_divergence(y_true, y_pred):
  y_pred = tf.convert_to_tensor(y_pred)
--- a/trust_and_safety_models/toxicity/rescoring.py
+++ b/trust_and_safety_models/toxicity/rescoring.py
@ -1,8 +1,7 @@
-from toxicity_ml_pipeline.load_model import reload_model_weights
-from toxicity_ml_pipeline.utils.helpers import load_inference_func, upload_model
-
 import numpy as np
 import tensorflow as tf
+from toxicity_ml_pipeline.load_model import reload_model_weights
+from toxicity_ml_pipeline.utils.helpers import load_inference_func, upload_model


 def score(language, df, gcs_model_path, batch_size=64, text_col="text", kw="", **kwargs):
--- a/trust_and_safety_models/toxicity/settings/default_settings_tox.py
+++ b/trust_and_safety_models/toxicity/settings/default_settings_tox.py
@ -1,6 +1,5 @@
 import os

-
 TEAM_PROJECT = "twttr-toxicity-prod"
 try:
  from google.cloud import bigquery
@ -16,7 +15,7 @@ else:
    CLIENT = None
    print("Issue at logging time", e)

-TRAINING_DATA_LOCATION = f"..."
+TRAINING_DATA_LOCATION = "..."
 GCS_ADDRESS = "..."
 LOCAL_DIR = os.getcwd()
 REMOTE_LOGDIR = "{GCS_ADDRESS}/logs"
--- a/trust_and_safety_models/toxicity/train.py
+++ b/trust_and_safety_models/toxicity/train.py
@ -1,14 +1,16 @@
+import os
 from datetime import datetime
 from importlib import import_module
-import os

+import numpy as np
+import tensorflow as tf
 from toxicity_ml_pipeline.data.data_preprocessing import (
  DefaultENNoPreprocessor,
  DefaultENPreprocessor,
 )
 from toxicity_ml_pipeline.data.dataframe_loader import ENLoader, ENLoaderWithSampling
 from toxicity_ml_pipeline.data.mb_generator import BalancedMiniBatchLoader
-from toxicity_ml_pipeline.load_model import load, get_last_layer
+from toxicity_ml_pipeline.load_model import get_last_layer, load
 from toxicity_ml_pipeline.optim.callbacks import (
  AdditionalResultLogger,
  ControlledStoppingCheckpointCallback,
@ -19,6 +21,8 @@ from toxicity_ml_pipeline.optim.schedulers import WarmUp
 from toxicity_ml_pipeline.settings.default_settings_abs import GCS_ADDRESS as ABS_GCS
 from toxicity_ml_pipeline.settings.default_settings_tox import (
  GCS_ADDRESS as TOX_GCS,
+)
+from toxicity_ml_pipeline.settings.default_settings_tox import (
  MODEL_DIR,
  RANDOM_SEED,
  REMOTE_LOGDIR,
@ -26,10 +30,6 @@ from toxicity_ml_pipeline.settings.default_settings_tox import (
 )
 from toxicity_ml_pipeline.utils.helpers import check_gpu, set_seeds, upload_model

-import numpy as np
-import tensorflow as tf
-
-
 try:
  from tensorflow_addons.optimizers import AdamW
 except ModuleNotFoundError:
@ -139,9 +139,9 @@ class Trainer(object):
    )
    print("------- Experiment name: ", experiment_name)
    self.logdir = (
-      f"..."
+      "..."
      if self.test
-      else f"..."
+      else "..."
    )
    self.checkpoint_path = f"{self.model_dir}/{experiment_name}"

--- a/trust_and_safety_models/toxicity/utils/helpers.py
+++ b/trust_and_safety_models/toxicity/utils/helpers.py
@ -3,11 +3,9 @@ import os
 import random as python_random
 import subprocess

-from toxicity_ml_pipeline.settings.default_settings_tox import LOCAL_DIR
-
 import numpy as np
 from sklearn.metrics import precision_recall_curve
-
+from toxicity_ml_pipeline.settings.default_settings_tox import LOCAL_DIR

 try:
  import tensorflow as tf