Source code for mldas.explore.evaluation

__copyright__ = """
Machine Learning for Distributed Acoustic Sensing data (MLDAS)
Copyright (c) 2020, The Regents of the University of California,
through Lawrence Berkeley National Laboratory (subject to receipt of
any required approvals from the U.S. Dept. of Energy). All rights reserved.

If you have questions about your rights to use or distribute this software,
please contact Berkeley Lab's Intellectual Property Office at
IPO@lbl.gov.

NOTICE.  This Software was developed under funding from the U.S. Department
of Energy and the U.S. Government consequently retains certain rights.  As
such, the U.S. Government has been granted for itself and others acting on
its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the
Software to reproduce, distribute copies to the public, prepare derivative 
works, and perform publicly and display publicly, and to permit others to do so.
"""
__license__ = "Modified BSD license (see LICENSE.txt)"
__maintainer__ = "Vincent Dumont"
__email__ = "vincentdumont11@gmail.com"

import numpy
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from .prepare import set_creation

[docs]def decode_plot(model,datapath,img_size=100,vae=False,compare=False,adjust=False,discrepancy=0.1):
    """
    Show original and decoded images for 5 random images.
    """
    target = set_creation(datapath,img_size=img_size,nrand=5,adjust=adjust)
    model.eval()
    out, _ = model(target.float())
    plt.style.use('seaborn')
    plt.figure(figsize=(14,6*(1+int(compare))),dpi=80)
    for i in range(5):
        # Load and plot original image
        ax1 = plt.subplot(2*(1+int(compare)),5,i+1)
        ax1.imshow(target[i][0],cmap='viridis')
        ax1.set_title('Original image')
        normout = out[i][0]-out[i][0].min()
        normout = normout/normout.max()
        # Plot decoded image
        ax2 = plt.subplot(2*(1+int(compare)),5,6+i)
        ax2.imshow(normout.data,cmap='viridis')
        ax2.set_title('Decoded image'+(' (reference)' if i==0 and compare else ''))
        if compare:
            diff = normout-target[i][0]
            accuracy = len(diff[numpy.array(abs(diff)<discrepancy)])/args.img_size**2*100
            # Plot difference between decoded and original image
            ax3 = plt.subplot(4,5,11+i)
            ax3.imshow((normout-target[i][0]).data,cmap='seismic',vmin=-1,vmax=1)
            ax3.set_title('%.2f%% accuracy'%accuracy)
            # Plot difference between decoded and reference images
            if i==0:
                refout = normout
            ax4 = plt.subplot(4,5,16+i)
            ax4.imshow(abs(normout-refout).data,cmap='OrRd')
            ax4.set_title('Decoded - Reference')
    plt.tight_layout()
    plt.show()

[docs]def embedding_plot(model,datapath,img_size=100,stride=10,sample_size=1,nrand=None,adjust=True,show_images=True):
    """
    Display latent representation in 2D space.
    """
    data = set_creation(datapath,img_size,stride,sample_size,nrand=nrand,adjust=adjust)
    model.eval()
    z = model(data[:nrand].float())[-1]
    z = z.data.cpu().numpy()
    plt.style.use('seaborn')
    fig, ax = plt.subplots(dpi=100)
    plt.scatter(z[:,0,0], z[:,0,1])
    if show_images:
        for i in range(len(z)):
            imagebox = OffsetImage(data[i,0], zoom=0.4)
            ab = AnnotationBbox(imagebox, (z[i,0,0], z[i,0,1]),frameon=False)
            ax.add_artist(ab)
    plt.xlabel('Latent variable 1')
    plt.ylabel('Latent variable 2')
    plt.tight_layout()

[docs]def epoch_recon(models,datapath,img_size=100,adjust=False,epochs=120,max_diff=0.1,step_size=0.01):
    """
    Display image reconstruction accuracy across epochs.
    """
    target = set_creation(datapath,img_size=img_size,nrand=1000,adjust=adjust)                                    # Load 1000 images
    out,results = success_rate(model,target,img_size,args.discrepancy)
    acc_size = numpy.arange(0,max_diff,step_size)                                                                 # Define discrepancy ranges
    results = numpy.zeros((epochs,len(acc_size)))                                                                 # Initialize results array (epoch vs. reconstruction accuracy)
    for epoch in range(epochs):                                                                                   # Loop over epochs
        model_epoch = models[epoch+1]                                                                               # Load epoch model
        model_epoch.eval()                                                                                          # Set model to evaluation mode
        out, _ = model_epoch(target.float())                                                                        # Execute trained model to data
        for j in range(len(out)):                                                                                   # Loop over all output data
            out[j][0] = (out[j][0]-out[j][0].min())/(out[j][0].max()-out[j][0].min())                                 # Normalized outputs
        diff = abs(out-target).reshape(len(out),img_size,img_size).data.numpy()                                     # Calculate difference between original and output images
        acc = numpy.array([[len(var[numpy.where((i<=var)&(var<i+step_size))]) for var in diff] for i in acc_size])  # Find how many pixels are found in each discrepancy range 
        acc = acc/img_size**2*100                                                                                   # Convert the values to percentages
        results[epoch] = numpy.mean(acc,axis=1)                                                                     # Calculate mean percentage accross all images
    plt.style.use('seaborn')                                                                                      # Set seaborn style
    fig = plt.figure(figsize=(10,6),dpi=80)                                                                       # Initialize figure
    ax1 = fig.add_axes([0.10,0.10,0.83,0.69]) # Main plot
    ax2 = fig.add_axes([0.95,0.10,0.03,0.69]) # Colorbar
    ax3 = fig.add_axes([0.10,0.82,0.83,0.15],sharex=ax1) # Histogram
    img = ax1.imshow(results.T[::-1],aspect='auto',cmap='summer',extent=[0,epochs,0,max_diff])
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Discrepancy threshold')
    plt.colorbar(img,label='Percentage of pixels',cax=ax2)                                                        # Plot colorbar
    y = [sum(results[i]) for i in range(epochs)]                                                                  # Sum all percentages for each epoch
    x = numpy.arange(epochs)
    ax3.bar(x,y,width=1,align='edge',color='lightgrey')
    ax3.set_facecolor('white')
    ax3.set_ylim(min(y)-1,max(y)+1)
    ax3.set_title('Reconstruction accuracy')
    plt.setp(ax3.get_xticklabels(), visible=False)
    plt.show()

[docs]def label_2d_latent(model,data_loader,embeddings=False):
    """
    Scatter plot of 2D latent space with label-based color schemme.

    Parameters
    ----------
    model : :py:class:`torch.nn.Module`
      Trained model.
    data_loader : :py:class:`torch.utils.data.DataLoader`
      Input data to evaluate the trained model with.
    embeddings : :py:class:`bool`
      Whether to display the embeddings or not. 

    """
    model.eval()
    plt.style.use('seaborn')
    fig, ax = plt.subplots(dpi=100)
    for batch_idx, (data,target) in enumerate(data_loader):
        data = data.float()
        z, recon_batch, mu, logvar = model(data.view(-1,numpy.prod(data.shape[-2:])))
        z = z.data.cpu().numpy()
        plt.scatter(z[:,0],z[:,1],s=10,c=target,cmap='cool',alpha=0.5)
        if embeddings:
            for i,img in enumerate(data):
                imagebox = OffsetImage(data[i,0], zoom=0.4)
                ab = AnnotationBbox(imagebox, (z[i,0], z[i,1]),frameon=False)
                ax.add_artist(ab)
    plt.xlabel('Latent variable 1')
    plt.ylabel('Latent variable 2')
    plt.tight_layout()

[docs]def success_rate(model,target,img_size,discrepancy_threshold,success_threshold=70):
    """
    Number of reconstructed images for which 70% (or the percentage defined by the success_threshold argument) of the pixels are 90% similar.
    """
    # Set model to evaluation mode
    model.eval()
    # Execute trained model to data
    out, _ = model(target.float())
    # Loop over all output data
    for i in range(len(out)):
        # Normalized outputs
        out[i][0] = (out[i][0]-out[i][0].min())/(out[i][0].max()-out[i][0].min())
    # Calculate difference between original and output images
    diff = abs(out-target).reshape(len(out),img_size,img_size).data.numpy()
    acc = numpy.array([len(var[numpy.where(var<discrepancy_threshold)]) for var in diff])
    acc = acc/img_size**2*100
    # Calculate success rate
    success_rate = sum(i>success_threshold for i in acc)/len(acc)*100
    # Display the following:
    #   - Success rate
    #   - Success threshold above which a single image is considered to be well reconstructed
    #   - Display reconstruction threshold (1 minus discrepancy threshold) above which a single
    #     pixel is considered to be well reconstructed
    print('%.2f%% of the images have'%success_rate,
          '%i%% of their pixels with'%success_threshold,
          '%i%% reconstruction fidelity'%((1-discrepancy_threshold)*100))
    return out,acc