community[patch]: Databricks SerDe uses cloudpickle instead of pickle (#18607)

- **Description:** Databricks SerDe uses cloudpickle instead of pickle
when serializing a user-defined function transform_input_fn since pickle
does not support functions defined in `__main__`, and cloudpickle
supports this.
- **Dependencies:** cloudpickle>=2.0.0

Added a unit test.
This commit is contained in:
Liang Zhang
2024-03-05 18:04:45 -08:00
committed by GitHub
parent f3e28289f6
commit 81985b31e6
4 changed files with 33 additions and 11 deletions

View File

@@ -1,5 +1,4 @@
import os
import pickle
import re
import warnings
from abc import ABC, abstractmethod
@@ -225,7 +224,12 @@ def _is_hex_string(data: str) -> bool:
def _load_pickled_fn_from_hex_string(data: str) -> Callable:
"""Loads a pickled function from a hexadecimal string."""
try:
return pickle.loads(bytes.fromhex(data))
import cloudpickle
except Exception as e:
raise ValueError(f"Please install cloudpickle>=2.0.0. Error: {e}")
try:
return cloudpickle.loads(bytes.fromhex(data))
except Exception as e:
raise ValueError(
f"Failed to load the pickled function from a hexadecimal string. Error: {e}"
@@ -235,7 +239,12 @@ def _load_pickled_fn_from_hex_string(data: str) -> Callable:
def _pickle_fn_to_hex_string(fn: Callable) -> str:
"""Pickles a function and returns the hexadecimal string."""
try:
return pickle.dumps(fn).hex()
import cloudpickle
except Exception as e:
raise ValueError(f"Please install cloudpickle>=2.0.0. Error: {e}")
try:
return cloudpickle.dumps(fn).hex()
except Exception as e:
raise ValueError(f"Failed to pickle the function: {e}")