__copyright__="""Machine Learning for Distributed Acoustic Sensing data (MLDAS)Copyright (c) 2020, The Regents of the University of California,through Lawrence Berkeley National Laboratory (subject to receipt ofany required approvals from the U.S. Dept. of Energy). All rights reserved.If you have questions about your rights to use or distribute this software,please contact Berkeley Lab's Intellectual Property Office atIPO@lbl.gov.NOTICE. This Software was developed under funding from the U.S. Departmentof Energy and the U.S. Government consequently retains certain rights. Assuch, the U.S. Government has been granted for itself and others acting onits behalf a paid-up, nonexclusive, irrevocable, worldwide license in theSoftware to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit others to do so."""__license__="Modified BSD license (see LICENSE.txt)"__maintainer__="Vincent Dumont"__email__="vincentdumont11@gmail.com"# Systemsimportosimportglobimportrandom# Externalsimporth5pyimportnumpyimporttorchfromcollectionsimportOrderedDictfromtorchvisionimporttransformsfromPILimportImage
[docs]defhdf5read(fname,key=['dsi30','data','variable']):""" Read input raw DAS file. The file is supposed to have an HDF5 format and the actual data stored in a group that have one of the following names: ``dsi30``, ``data`` or ``variable``. Parameters ---------- fname : :py:class:`str` Full path to the HDF5 file key : :py:class:`str` or :py:class:`list` Group's key name where the data are stored. If not specify, a list of keys will be looped over. Returns ------- data : :py:class:`numpy.ndarray` Raw data """f=h5py.File(fname,'r')iftype(key)==str:data=f[f[key][0,0]]else:assertany(keynameinf.keys()forkeynameinkey),'No acceptable key found in input HDF5 file.'forkeynameinf.keys():ifkeynameinkey:data=f[f['%s/dat'%keyname][0,0]]breakreturndata
[docs]defload_model(fname,model):""" Load saved model's parameter dictionary to initialized model. The function will remove any ``.module`` string from parameter's name. Parameters ---------- fname : :py:class:`str` Path to saved model model : :py:class:`torch.nn.Module` Initialized network network architecture Returns ------- model : :py:class:`torch.nn.Module` Up-to-date neural network model """checkpoint=torch.load(fname,map_location=lambdastorage,loc:storage)state_dict=checkpoint['model']new_state_dict=OrderedDict()fork,vinstate_dict.items():ifk.startswith('module.'):k=k[7:]new_state_dict[k]=vcheckpoint['model']=new_state_dictmodel.load_state_dict(checkpoint['model'])returnmodel
[docs]defsave_data(data,dir_dst,fname):""" Save raw data region as JPG image. Parameters ---------- data : :py:class:`numpy.ndarray` Input raw data dir_dst : :py:class:`str` Path to save image fname : :py:class:`str` Output file name """img=(data-data.min())/(data.max()-data.min())img=Image.fromarray(numpy.uint8(img*255))img.save('%s/%s.jpg'%(dir_dst,fname))
[docs]defload_image(data,rgb=False,to_numpy=False,squeeze=False):""" Load input data as tensor image. Input data can be either in the form of JPG image or raw data region and will be converted into numpy.uint8 format, then either RGB or grayscale image. Parameters ---------- data : :py:class:`str` or :py:class:`numpy.ndarray` Input data, either path saved image, or raw data array. rgb : :py:class:`bool` Convert data to RGB data image to_numpy : :py:class:`bool` Convert data to numpy array squeeze : :py:class:`bool` Squeeze data to remove any dimensions equal to 1. """iftype(data)==str:data=os.path.expandvars(data)image=Image.open(data)else:image=(data-data.min())/(data.max()-data.min())image=Image.fromarray(numpy.uint8(image*255))ifrgb:image=transforms.ToTensor()(image.convert("RGB")).view(1,3,*image.size)else:image=transforms.ToTensor()(image.convert("L")).view(1,1,*image.size)ifsqueeze:image=torch.squeeze(image)ifto_numpy:image=image.numpy()returnimage
[docs]defload_bulk(dname,size,rgb=False,to_numpy=False,labeled=True):""" Load multiple images from directory. Parameters ---------- dname : :py:class:`str` Path to directory where images are saved. size : :py:class:`int` Number of images to be loaded rgb : :py:class:`bool` Convert data to RGB data image to_numpy : :py:class:`bool` Convert data to numpy array labeled : :py:class:`bool` Whether images are saved in a labeled structure (same structure than for :py:class:`torchvision.datasets.ImageFolder` class) or directly in target repository. Returns ------- tensors : :py:class:`torch.Tensor` or :py:class:`numpy.ndarray` Output list of loaded images either in tensor or numpy array formats. labels : :py:class:`torch.Tensor` or :py:class:`numpy.ndarray` Output list of labels either in tensor or numpy array formats. """fname=os.path.expandvars(dname)iflabeled:all_labels=[labelforlabelinos.listdir(fname+'/train/')iflabel!='.DS_Store']assertsize%len(all_labels)==0,'Requested bulk size not multiple of number of labels.'labels,tensors=[],[]fori,labelinenumerate(all_labels):file_list=glob.glob('%s/*/%s/*.jpg'%(dname,label))forfnameinrandom.sample(file_list,size//len(all_labels)):tensors.append(load_image(fname,rgb=rgb))labels+=([i]*(size//len(all_labels)))tensors=torch.stack(tensors).squeeze(dim=1)ifto_numpy:tensors=torch.squeeze(tensors).numpy()idxs=random.sample(range(size),size)returntensors[idxs],numpy.array(labels)[idxs]else:tensors=[]file_list=glob.glob('%s/*.jpg'%dname)forfnameinrandom.sample(file_list,size):tensors.append(load_image(fname,rgb=rgb))tensors=torch.stack(tensors).squeeze(dim=1)ifto_numpy:tensors=torch.squeeze(tensors).numpy()returntensors,numpy.array([0]*(size))