'''
Legacy methods for handling old pickles (eg Python 2 pickles). Included for backwards
compatibility, but not imported into Sciris by default.
'''
import types
import traceback
import gzip as gz
import numpy as np
import pickle as pkl
import datetime as dt
import copyreg as cpreg
from io import BytesIO as IO
from contextlib import closing
from . import sc_fileio as scf
from . import sc_utils as scu
from . import sc_odict as sco
##############################################################################
#%% Python 2 legacy support
##############################################################################
not_string_pickleable = ['datetime', 'BytesIO']
byte_objects = ['datetime', 'BytesIO', 'odict', 'spreadsheet', 'blobject']
[docs]def loadobj2or3(filename=None, filestring=None, recursionlimit=None, **kwargs): # pragma: no cover
'''
Try to load as a (Sciris-saved) Python 3 pickle; if that fails, try to load
as a Python 2 pickle. For legacy support only.
For available keyword arguments, see sc.load().
Args:
filename (str): the name of the file to load
filestring (str): alternatively, specify an already-loaded bytestring
recursionlimit (int): how deeply to parse objects before failing (default 1000)
'''
try:
output = scf.loadobj(filename=filename, **kwargs)
except:
output = _loadobj2to3(filename=filename, filestring=filestring, recursionlimit=recursionlimit)
return output
def _loadobj2to3(filename=None, filestring=None, recursionlimit=None): # pragma: no cover
'''
Used by loadobj2or3() to load Python2 objects in Python3 if all other
loading methods fail. Uses a recursive approach, so can set a recursion limit.
'''
class Placeholder():
''' Replace these corrupted classes with properly loaded ones '''
def __init__(*args):
return
def __setstate__(self, state):
if isinstance(state,dict):
self.__dict__ = state
else:
self.state = state
return
class StringUnpickler(pkl.Unpickler):
def find_class(self, module, name, verbose=False):
if verbose: print('Unpickling string module %s , name %s' % (module, name))
if name in not_string_pickleable:
return scf.Empty
else:
try:
output = pkl.Unpickler.find_class(self,module,name)
except Exception as E:
print('Warning, string unpickling could not find module %s, name %s: %s' % (module, name, str(E)))
output = scf.Empty
return output
class BytesUnpickler(pkl.Unpickler):
def find_class(self, module, name, verbose=False):
if verbose: print('Unpickling bytes module %s , name %s' % (module, name))
if name in byte_objects:
try:
output = pkl.Unpickler.find_class(self,module,name)
except Exception as E:
print('Warning, bytes unpickling could not find module %s, name %s: %s' % (module, name, str(E)))
output = Placeholder
return output
else:
return Placeholder
def recursive_substitute(obj1, obj2, track=None, recursionlevel=0, recursionlimit=None):
if recursionlimit is None: # Recursion limit
recursionlimit = 1000 # Better to die here than hit Python's recursion limit
def recursion_warning(count, obj1, obj2):
output = 'Warning, internal recursion depth exceeded, aborting: depth=%s, %s -> %s' % (count, type(obj1), type(obj2))
return output
recursionlevel += 1
if track is None:
track = []
if isinstance(obj1, scf.Blobject): # Handle blobjects (usually spreadsheets)
obj1.blob = obj2.__dict__[b'blob']
obj1.bytes = obj2.__dict__[b'bytes']
if isinstance(obj2, dict): # Handle dictionaries
for k,v in obj2.items():
if isinstance(v, dt.datetime):
setattr(obj1, k.decode('latin1'), v)
elif isinstance(v, dict) or hasattr(v,'__dict__'):
if isinstance(k, (bytes, bytearray)):
k = k.decode('latin1')
track2 = track.copy()
track2.append(k)
if recursionlevel<=recursionlimit:
recursionlevel = recursive_substitute(obj1[k], v, track2, recursionlevel, recursionlimit)
else:
print(recursion_warning(recursionlevel, obj1, obj2))
else:
for k,v in obj2.__dict__.items():
if isinstance(v, dt.datetime):
setattr(obj1,k.decode('latin1'), v)
elif isinstance(v,dict) or hasattr(v,'__dict__'):
if isinstance(k, (bytes, bytearray)):
k = k.decode('latin1')
track2 = track.copy()
track2.append(k)
if recursionlevel<=recursionlimit:
recursionlevel = recursive_substitute(getattr(obj1,k), v, track2, recursionlevel, recursionlimit)
else:
print(recursion_warning(recursionlevel, obj1, obj2))
return recursionlevel
def loadintostring(fileobj):
unpickler1 = StringUnpickler(fileobj, encoding='latin1')
try:
stringout = unpickler1.load()
except Exception as E:
print('Warning, string pickle loading failed: %s' % str(E))
exception = traceback.format_exc() # Grab the trackback stack
stringout = scf.makefailed(module_name='String unpickler failed', name='n/a', error=E, exception=exception)
return stringout
def loadintobytes(fileobj):
unpickler2 = BytesUnpickler(fileobj, encoding='bytes')
try:
bytesout = unpickler2.load()
except Exception as E:
print('Warning, bytes pickle loading failed: %s' % str(E))
exception = traceback.format_exc() # Grab the trackback stack
bytesout = scf.makefailed(module_name='Bytes unpickler failed', name='n/a', error=E, exception=exception)
return bytesout
# Load either from file or from string
if filename:
with gz.GzipFile(filename) as fileobj:
stringout = loadintostring(fileobj)
with gz.GzipFile(filename) as fileobj:
bytesout = loadintobytes(fileobj)
elif filestring:
with closing(IO(filestring)) as output:
with gz.GzipFile(fileobj=output, mode='rb') as fileobj:
stringout = loadintostring(fileobj)
with closing(IO(filestring)) as output:
with gz.GzipFile(fileobj=output, mode='rb') as fileobj:
bytesout = loadintobytes(fileobj)
else:
errormsg = 'You must supply either a filename or a filestring for loadobj() or loadstr(), respectively'
raise Exception(errormsg)
# Actually do the load, with correct substitution
recursive_substitute(stringout, bytesout, recursionlevel=0, recursionlimit=recursionlimit)
return stringout
##############################################################################
#%% Twisted pickling methods
##############################################################################
# NOTE: The code below is part of the Twisted package, and is included
# here to allow functools.partial() objects (among other things) to be
# pickled; they are not for public consumption. --CK
# From: twisted/persisted/styles.py
# -*- test-case-name: twisted.test.test_persisted -*-
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
_UniversalPicklingError = pkl.PicklingError
def _pickleMethod(method):
return (_unpickleMethod, (method.__name__, method.__self__, method.__self__.__class__))
def _methodFunction(classObject, methodName):
methodObject = getattr(classObject, methodName)
return methodObject
def _unpickleMethod(im_name, im_self, im_class):
if im_self is None:
return getattr(im_class, im_name)
try:
methodFunction = _methodFunction(im_class, im_name)
except AttributeError: # pragma: no cover
assert im_self is not None, "No recourse: no instance to guess from."
if im_self.__class__ is im_class:
raise
return _unpickleMethod(im_name, im_self, im_self.__class__)
else:
maybeClass = ()
bound = types.MethodType(methodFunction, im_self, *maybeClass)
return bound
cpreg.pickle(types.MethodType, _pickleMethod, _unpickleMethod)
# Legacy support for loading Sciris <1.0 objects; may be removed in future
pickleMethod = _pickleMethod
unpickleMethod = _unpickleMethod
##############################################################################
#%% Legacy data frame class
##############################################################################
[docs]class legacy_dataframe(object): # pragma: no cover
'''
This legacy dataframe is maintained solely to allow loading old files.
**Example**::
import sciris as sc
from sciris import sc_legacy as scl
remapping = {'sciris.sc_dataframe.dataframe':scl.legacy_dataframe}
old = sc.load('my-old-file.obj', remapping=remapping)
| Version: 2020nov29
| Migrated to ``sc_legacy`` in version 2.0.0.
'''
def __init__(self, cols=None, data=None, nrows=None):
self.cols = None
self.data = None
self.make(cols=cols, data=data, nrows=nrows)
return
def __repr__(self, spacing=2):
''' spacing = space between columns '''
if not self.cols: # No keys, give up
return '<empty dataframe>'
else: # Go for it
outputlist = sco.odict()
outputformats = sco.odict()
# Gather data
nrows = self.nrows
for c,col in enumerate(self.cols):
outputlist[col] = list()
maxlen = len(col) # Start with length of column name
if nrows:
for val in self.data[:,c]:
output = scu.flexstr(val)
maxlen = max(maxlen, len(output))
outputlist[col].append(output)
outputformats[col] = '%'+'%i'%(maxlen+spacing)+'s'
ndigits = (np.floor(np.log10(max(1,nrows)))+1) # Don't allow 0 rows
indformat = '%%%is' % ndigits # Choose the right number of digits to print
# Assemble output
output = indformat % '' # Empty column for index
for col in self.cols: # Print out header
output += outputformats[col] % col
output += '\n'
for ind in range(nrows): # Loop over rows to print out
output += indformat % scu.flexstr(ind)
for col in self.cols: # Print out data
output += outputformats[col] % outputlist[col][ind]
if ind<nrows-1: output += '\n'
return output
@property
def ncols(self):
''' Get the number of columns in the data frame '''
ncols = len(self.cols)
ncols2 = self.data.shape[1]
if ncols != ncols2:
errormsg = 'Dataframe corrupted: %s columns specified but %s in data' % (ncols, ncols2)
raise Exception(errormsg)
return ncols
@property
def nrows(self):
''' Get the number of rows in the data frame '''
try: return self.data.shape[0]
except: return 0 # If it didn't work, probably because it's empty
@property
def shape(self):
''' Equivalent to the shape of the data array, minus the headers '''
return (self.nrows, self.ncols)
[docs] def make(self, cols=None, data=None, nrows=None):
'''
Creates a dataframe from the supplied input data.
**Usage examples**::
df = sc.dataframe()
df = sc.dataframe(['a','b','c'])
df = sc.dataframe(['a','b','c'], nrows=2)
df = sc.dataframe([['a','b','c'],[1,2,3],[4,5,6]])
df = sc.dataframe(['a','b','c'], [[1,2,3],[4,5,6]])
df = sc.dataframe(cols=['a','b','c'], data=[[1,2,3],[4,5,6]])
'''
import pandas as pd # Optional import
# Handle columns
if nrows is None:
nrows = 0
if cols is None and data is None:
cols = list()
data = np.zeros((int(nrows), 0), dtype=object) # Object allows more than just numbers to be stored
elif cols is None and data is not None: # Shouldn't happen, but if it does, swap inputs
cols = data
data = None
if isinstance(cols, pd.DataFrame): # It's actually a Pandas dataframe
self.pandas(df=cols)
return # We're done
# A dictionary is supplied: assume keys are columns, and the rest is the data
if isinstance(cols, dict):
data = [col for col in cols.values()]
cols = list(cols.keys())
elif not scu.checktype(cols, 'listlike'):
errormsg = 'Inputs to dataframe must be list, tuple, or array, not %s' % (type(cols))
raise Exception(errormsg)
# Handle data
if data is None:
if np.ndim(cols)==2 and np.shape(cols)[0]>1: # It's a 2D array with more than one row: treat first as header
data = scu.dcp(cols[1:])
cols = scu.dcp(cols[0])
else:
data = np.zeros((int(nrows),len(cols)), dtype=object) # Just use default
data = np.array(data, dtype=object)
if data.ndim != 2:
if data.ndim == 1:
if len(cols)==1: # A single column, use the data to populate the rows
data = np.reshape(data, (len(data),1))
elif len(data)==len(cols): # A single row, use the data to populate the columns
data = np.reshape(data, (1,len(data)))
else:
errormsg = 'Dimension of data can only be 1 if there is 1 column, not %s' % len(cols)
raise Exception(errormsg)
else:
errormsg = 'Dimension of data must be 1 or 2, not %s' % data.ndim
raise Exception(errormsg)
if data.shape[1]==len(cols):
pass
elif data.shape[0]==len(cols):
data = data.transpose()
else:
errormsg = 'Number of columns (%s) does not match array shape (%s)' % (len(cols), data.shape)
raise Exception(errormsg)
# Store it
self.cols = list(cols)
self.data = data
return