Source code for xpipe.xhandle.shearops

"""
Postprocess XSHEAR output
"""

from __future__ import print_function, division
import math

import fitsio as fio
import numpy as np
import pandas as pd
import sys
import copy

from .ioshear import read_single_bin, read_multiple_bin
from .xwrap import sheared_tags, create_infodict
from ..paths import fullpaths, params
from ..tools.catalogs import match_endian, to_pandas

BADVAL = -9999.0


def _get_nbin(data):
    """obtains number of radial bins from **xread** output"""
    return len(data[0, 0, :])


[docs]def redges(rmin, rmax, nbin):
    """
    Calculates nominal edges and centers for logarithmic radial bins(base10 logarithm)

    Edges and areas are exact, "center" values are estimated as CIRCUMFERENCE weighted
    mean radius

    Parameters
    ----------
    rmin : float
        inner edge
    rmax : float
        outer edge
    nbin : int
        number of bins

    Returns
    -------
    np.array, np.array, np.array
        centers, edges, areas

    """
    edges = np.logspace(math.log10(rmin), math.log10(rmax), nbin + 1,
                        endpoint=True)
    cens = np.array([(edges[i + 1] ** 3. - edges[i] ** 3.) * 2. / 3. /
                     (edges[i + 1] ** 2. - edges[i] ** 2.)
                     for i, edge in enumerate(edges[:-1])])

    areas = np.array([np.pi * (edges[i + 1] ** 2. - edges[i] ** 2.)
                      for i, val in enumerate(edges[:-1])])

    return cens, edges, areas


[docs]class StackedProfileContainer(object):
    def __init__(self, info, data, labels, ncen, lcname=None, metadata=None, metatags=None, Rs=None, weights=None, shear=False, **kwargs):
        """
        Object Oriented interface for stacked shear and :math:`\\Delta\\Sigma` calculated via **xshear**

        **Features**

            * Jackknife based covariance estimation

            * Calculate *shear*, *DeltaSigma*, and other averaged quantities

            * Simple arithmetic with shear profiles: :code:`+, -, *, /`

            * Integration with *Metacalibration* style selection responses


        **Overview**

        The lensing profile is calculated in two stages:

            * First, for each Jackknife-region defined by the :code:`labels`
              (that is for all-except-one region) the lensing profile is calculated, and stored in
              :code:`self.*_sub` (These are the JK-subprofiles).

            * Using these JK-subprofiles, the mean lensing profile and the corresponding covariance
              is calculated via blockwise omitt-one Jackknife resampling.

        In practice both of these steps are performed when calling **self.prof_maker()**, and the results are
        stored in the below varaiables. The default quantity calculated is the **DeltaSigma**::

            self.rr:        radial centers
            self.dst:       E-mode lensing signal
            self.dst_err:   error for E-mode lensing signal
            self.dst_cov:   covariance for E-mode lensing signal
            self.dst:       B-mode lensing signal
            self.dst_err:   error for B-mode lensing signal
            self.dst_cov:   covariance for B-mode  lensing signal

        A benefit of this two-step method, is that it allows for the quick calculation of errors on the
        **differences**, **ratios** and other transformations of shear profiles.

        In particular, the **self.composite** function allows four basic operations :code:`+, -, *, /`
        between shear profiles, while the **self.multiply** function allows :code:`\*, /`
        with integers or 1-D arrays (in which case each element is a value for a lens)

            E.g to calculate :math:`\\Delta\\Sigma_3 = \\Delta\\Sigma_2 - \\Delta\\Sigma_1`,
            we only need to compute the differences of JK-subprofiles and
            then re-compute the JK-mean, and JK-covariance.

            In practice the above operation is achieved by::

                # the operation is performed in-place, hence the deepcopy
                prof3 = copy.deepcopy(prof2)
                prof3.composite(prof1, operation="-")

        Changing the physical quantity computed by **self.prof_maker()** is possibly by modifying the
        columns used by the stacking algorithm. The default values used for **DeltaSigma** are::

            # default values for stacked DeltaSigma
            self.dst_nom = 4
            self.dsx_nom = 5
            self.dst_denom = 6
            self.dsx_denom = 7

            self.e1_nom = 10
            self.e2_nom = 11
            self.meta_denom = 3
            self.meta_prefac = 2

        To calculate **shear** one would need to change the above to::

            self.dst_denom = 8
            self.dsx_denom = 9

            self.meta_denom = 3
            self.meta_prefac = 3

        Further information on the column positions are found in the **xread** documentation...


        Parameters
        ----------
        info : np.ndarray
            :code:`info` table from **xread**
        data : np.ndarray
            :code:`data` table from **xread**
        labels : np.array
            JK-region labels
        ncen : int
            number of JK regions in total
        lcname : str
            tag string
        metadata : list of np.ndarray
            sheared versions of :code:`data`
        metatags : list of str
            sheared tags
        Rs : float
            metacal selection response (constant) which will be used instead of metadata / metatags
        """

        # indices to be used in the shear stacking
        self.dst_nom = 4
        self.dsx_nom = 5
        self.dst_denom = 6
        self.dsx_denom = 7

        self.dsensum_s = 8

        self.e1_nom = 10
        self.e2_nom = 11
        self.meta_denom = 3
        self.meta_prefac = 2

        self.snum_ind = 2

        if shear:
            self.dst_denom = 8
            self.dsx_denom = 9

            self.meta_denom = 3
            self.meta_prefac = 3
            self.snum_ind = 3

        # input params saved
        self.info = info
        self.data = data
        self.lcname = lcname

        self.nbin = 1
        if self.data is not None:
            self.nbin = _get_nbin(data)

        self.num = 1
        if self.info is not None:
            self.num = len(info)

        # Data needed for metacalibration corrections
        self.metadata = metadata
        self.metatags = metatags
        self.ismeta = self.metadata is not None and self.metatags is not None

        self.Rs = Rs

        if self.Rs is not None:
            self.ismeta = False

        self.labels = labels
        self.ncen = ncen  # number of centers

        # containers for stacking parameters
        self.weights = weights  # stacking weights

        # containers for the Jackknife sampling
        self.sub_labels = None
        self.subcounts = None
        self.indexes = None
        self.non_indexes = None
        self.hasval = None
        self.wdata = None

        self.dsx_sub = None
        self.dst_sub = None
        self.snum_sub = None

        # containers for the resulting profile
        self.w = np.ones(self.num)
        self.rr = np.ones(self.nbin) * BADVAL
        self.dst0 = np.zeros(self.nbin)
        self.dsx0 = np.zeros(self.nbin)
        self.dst = np.zeros(self.nbin)
        self.dsx = np.zeros(self.nbin)
        self.dst_cov = np.zeros((self.nbin, self.nbin))
        self.dst_err = np.zeros(self.nbin)
        self.dsx_cov = np.zeros((self.nbin, self.nbin))
        self.dsx_err = np.zeros(self.nbin)

        self.resp_shear = np.zeros(self.nbin)
        self.resp_sel = np.zeros(self.nbin)
        self.resp_shear_err = np.zeros(self.nbin)
        self.resp_sel_err = np.zeros(self.nbin)

        # containers for the source number profile
        self.snum = np.zeros(self.nbin)
        self.snum0 = np.zeros(self.nbin)
        self.snum_err = np.zeros(self.nbin)
        self.snum_cov = np.zeros((self.nbin, self.nbin))

        self.neff = 0  # number of entries with sources in any bin
        self.hasprofile = False
        self.hasdata = True

    def copy(self):

        infodict = self.to_dict()

        obj = StackedProfileContainer(**infodict)
        return obj

[docs]    def to_dict(self):
        """
        Extracts the raw data to a dictionary

        Contains the arguments passed to this dataset
        """
        infodict = {
            "info": self.info,
            "data": self.data,
            "labels": self.labels,
            "ncen": self.ncen,
            "lcname": self.lcname,
            "metadata": self.metadata,
            "metatags": self.metatags,
            "Rs": self.Rs,
            "weights": self.weights,
        }
        return infodict


[docs]    @classmethod
    def from_sub_dict(cls, sub_dict):
        """
        Reconstructs the object from a dictionary

        The resulting object does not contain the base data, only the profile, JK-subprofiles and
        ancillary quantities
        """

        spc = cls(None, None, None, sub_dict["ncen"], lcname=sub_dict["lcname"])
        for key in sub_dict:
            setattr(spc, key, sub_dict[key])
        spc.hasdata = False
        return spc

[docs]    def to_sub_dict(self):
        """
        Extracts the calculated profile and ancillary information to a dictionary

        The resulting dict does not contain the base data, only the profile, the JK-subprofiles and
        ancillary quantities
        """
        # raise DeprecationWarning
        infodict = {
            "lcname": self.lcname,
            "nbin": self.nbin,
            "num": self.num,
            "ismeta": self.ismeta,
            "ncen": self.ncen,
            "sub_labels": self.sub_labels,
            "subcounts": self.subcounts,
            "indexes": self.indexes,
            "non_indexes": self.non_indexes,
            "hasval": self.hasval,
            "wdata": self.wdata,
            "dsx_sub": self.dsx_sub,
            "dst_sub": self.dst_sub,
            "snum_sub": self.snum_sub,
            "rr": self.rr,
            "dst0": self.dst0,
            "dsx0": self.dsx0,
            "dst": self.dst,
            "dsx": self.dsx,
            "dst_cov": self.dst_cov,
            "dst_err": self.dst_err,
            "dsx_cov": self.dsx_cov,
            "dsx_err": self.dsx_err,
            "snum": self.snum,
            "snum0": self.snum0,
            "snum_err": self.snum_err,
            "snum_cov": self.snum_cov,
            "neff": self.neff,
            "hasprofile": self.hasprofile,
        }
        return infodict

[docs]    def drop_data(self):
        """
        resets the :code:`info`, :code:`data` and :code:`labels` containers to :code:`None`

        This reduces the size of the object in memory, but cannot re-stack the lenses anymore.
        Leaves the profile, JK-subprofiles and ancillary quantities unaffected
        """
        self.info = None
        self.data = None
        self.labels = None
        self.hasdata = False

    def _reset_profile(self, keep_rr=True, reset_w=False):
        """Resets the profile container, while keeping the base data and JK-subprofiles"""
        if reset_w:
            self.w = np.ones(self.num)
        if not keep_rr:
            self.rr = np.ones(self.nbin) * -1.0
        self.dst0 = np.zeros(self.nbin)
        self.dsx0 = np.zeros(self.nbin)
        self.dst = np.zeros(self.nbin)
        self.dsx = np.zeros(self.nbin)
        self.dst_cov = np.zeros((self.nbin, self.nbin))
        self.dst_err = np.zeros(self.nbin)
        self.dsx_cov = np.zeros((self.nbin, self.nbin))
        self.dsx_err = np.zeros(self.nbin)

        self.resp_shear = np.zeros(self.nbin)
        self.resp_sel = np.zeros(self.nbin)
        self.resp_shear_err = np.zeros(self.nbin)
        self.resp_sel_err = np.zeros(self.nbin)

        self.hasprofile = False

    def _setup_subpatches(self):
        """Quick setup for JK-subpatches"""

        self.sub_labels = np.unique(self.labels).astype(int)

        # indexes of clusters for subsample i
        self.indexes = [np.where(self.labels != ind)[0]
                        for ind in self.sub_labels]

        # indexes of clusters not in subsample i
        self.non_indexes = [np.where(self.labels == ind)[0]
                            for ind in self.sub_labels]

        self.dsx_sub = np.zeros(shape=(self.nbin, self.ncen))
        self.dst_sub = np.zeros(shape=(self.nbin, self.ncen))
        self.snum_sub = np.zeros(shape=(self.nbin, self.ncen))

        self.resp_sub_shear = np.zeros(shape=(self.nbin, self.ncen))
        self.resp_sub_sel = np.zeros(shape=(self.nbin, self.ncen))
        self.resp_sub_shear_err = np.zeros(shape=(self.nbin, self.ncen))
        self.resp_sub_sel_err = np.zeros(shape=(self.nbin, self.ncen))

    def _get_rr(self):
        """calculating radial values for data points"""
        nzind = np.where(np.sum(self.data[0, :, :], axis=0) > 0)[0]

        self.rr[nzind] = np.sum(self.data[1, :, nzind] * self.w, axis=1) / \
                         np.sum(self.data[2, :, nzind] * self.w, axis=1)

    def _get_neff(self):
        """calculates effective number of entries (lenses)"""
        return len(np.nonzero(self.info[:, 2])[0])


    def _get_single_subcounts(self, data):
        subcounts = np.array([np.sum(data[0, ind, :], axis=0)
                              for ind in self.indexes]).astype(int)
        return subcounts

    def _get_subcounts(self):
        tmp_subcounts = []

        # processing main data
        subcount = self._get_single_subcounts(self.data)
        tmp_subcounts.append(subcount)

        if self.ismeta:
            for i, tag in enumerate(self.metatags):
                subcount = self._get_single_subcounts(self.metadata[i])
                tmp_subcounts.append(subcount)
        tmp_subcounts = np.dstack(tmp_subcounts)
        subcounts = np.min(tmp_subcounts, axis=2)
        return subcounts

    def _subprofiles(self):
        """Calculates subprofiles for each patch"""

        self.subcounts = self._get_subcounts()
        hasval = [np.nonzero(arr.astype(bool))[0] for arr in self.subcounts]

        # calculating jackknife subprofiles
        for i, lab in enumerate(self.sub_labels):
            # print i, lab
            ind = self.indexes[i]
            cind = hasval[i]

            ww = self.w[ind, np.newaxis]
            ww_sum = np.sum(self.w[ind])

            Rs = np.zeros(len(cind))
            if self.Rs is not None:
                Rs = np.ones(len(cind)) * self.Rs

            if self.ismeta:
                val1parr = (np.sum(self.metadata[0][self.e1_nom, ind][:, cind] * ww, axis=0) /
                            np.sum(self.metadata[0][self.meta_denom, ind][:, cind] * ww, axis=0))
                val1marr = (np.sum(self.metadata[1][self.e1_nom, ind][:, cind] * ww, axis=0) /
                            np.sum(self.metadata[1][self.meta_denom, ind][:, cind] * ww, axis=0))
                R11 = (val1parr - val1marr) / 0.02

                val2parr = (np.sum(self.metadata[2][self.e2_nom, ind][:, cind] * ww, axis=0) /
                            np.sum(self.metadata[2][self.meta_denom, ind][:, cind] * ww, axis=0))
                val2marr = (np.sum(self.metadata[3][self.e2_nom, ind][:, cind] * ww, axis=0) /
                            np.sum(self.metadata[3][self.meta_denom, ind][:, cind] * ww, axis=0))
                R22 = (val2parr - val2marr) / 0.02

                Rs = 0.5 * (R11 + R22)
                # print(Rs)

            wsum = np.sum(self.data[self.meta_prefac, ind][:, cind] * ww, axis=0)
            ssum = np.sum(self.data[self.meta_denom, ind][:, cind] * ww, axis=0)

            dsum_jack = np.sum(self.data[self.dst_nom, ind][:, cind] * ww, axis=0)
            dsum_w_jack = np.sum(self.data[self.dst_denom, ind][:, cind] * ww, axis=0)

            self.dst_sub[cind, lab] = dsum_jack / (dsum_w_jack + Rs * wsum)

            osum_jack = np.sum(self.data[self.dsx_nom, ind][:, cind] * ww, axis=0)
            osum_w_jack = np.sum(self.data[self.dsx_denom, ind][:, cind] * ww, axis=0)
            self.dsx_sub[cind, lab] = osum_jack / (osum_w_jack + Rs * wsum)

            dsensum = np.sum(self.data[self.dsensum_s, ind][:, cind] * ww, axis=0)
            self.resp_sub_shear[cind, lab] = dsensum / ssum
            self.resp_sub_sel[cind, lab] = Rs

            self.snum_sub[cind, lab] = np.sum(self.data[self.snum_ind, ind][:, cind] * ww, axis=0) / ww_sum
            self.snum_sub[:, lab] /= np.sum(self.snum_sub[:, lab])

    def _profcalc(self):
        """JK estimate on the mean profile"""

        tmp_rr = self.rr
        self.rr = np.ones(len(self.rr)) * BADVAL

        for r in range(self.nbin):
            # checking for radial bins with 1 pair count (to avoid NaNs)
            subind = self.sub_labels[np.where(self.subcounts[:, r] > 1)[0]]
            njk = len(subind)
            if njk > 1:
                self.dst[r] = np.sum(self.dst_sub[r, subind]) / njk
                self.dsx[r] = np.sum(self.dsx_sub[r, subind]) / njk
                self.snum[r] = np.sum(self.snum_sub[r, subind]) / njk
                self.rr[r] = tmp_rr[r]

                self.resp_sel[r] = np.sum(self.resp_sub_sel[r, subind]) / njk
                self.resp_shear[r] = np.sum(self.resp_sub_shear[r, subind]) / njk
                # This is kind of a placeholder errorbar without cov bookeeping
                self.resp_sel_err[r] = np.sqrt(np.var(self.resp_sub_sel[r, subind]) * (njk - 1.) / njk)
                self.resp_shear_err[r] = np.sqrt(np.var(self.resp_sub_shear[r, subind]) * (njk - 1.) / njk)


    def _covcalc(self):
        """JK estimate on the covariance matrix"""
        # calculating the covariance
        for r1 in range(self.nbin):
            for r2 in range(self.nbin):
                # getting patches where there are multiple elements in both indices
                subind1 = self.sub_labels[np.where(
                    self.subcounts[:, r1] > 1)[0]]
                subind2 = self.sub_labels[np.where(
                    self.subcounts[:, r2] > 1)[0]]
                # overlapping indices
                subind = list(set(subind1).intersection(set(subind2)))
                njk = len(subind)
                if njk > 1:
                    self.dst_cov[r1, r2] = np.sum((self.dst_sub[r1, subind] -
                                                   self.dst[r1]) *
                                                  (self.dst_sub[r2, subind] -
                                                   self.dst[r2])) * \
                                           (njk - 1.0) / njk

                    self.dsx_cov[r1, r2] = np.sum((self.dsx_sub[r1, subind] -
                                                   self.dsx[r1]) *
                                                  (self.dsx_sub[r2, subind] -
                                                   self.dsx[r2])) * \
                                           (njk - 1.0) / njk

                    self.snum_cov[r1, r2] = np.sum((self.snum_sub[r1, subind] -
                                                   self.snum[r1]) *
                                                  (self.snum_sub[r2, subind] -
                                                   self.snum[r2])) * \
                                           (njk - 1.0) / njk

        self.dst_err = np.sqrt(np.diag(self.dst_cov))
        self.dsx_err = np.sqrt(np.diag(self.dsx_cov))
        self.snum_err = np.sqrt(np.diag(self.snum_cov))

[docs]    def prof_maker(self, weights=None):
        """
        Calculates the Jackknife estimate of on stacked profile and on the corresponding covariance

        Parameters
        ----------
        weights : np.array
            weight for each entry in the lens catalog
        """

        # adding weights
        if weights is None:
            weights = np.ones(self.num)

        # print weights.shape
        self.w = weights

        # preparing the JK patches
        self._setup_subpatches()

        # getting radius values
        self._get_rr()

        # calculating the profiles
        self._subprofiles()
        self._profcalc()
        self._covcalc()

        self.neff = self._get_neff()
        self.hasprofile = True

    def _composite_subprofiles(self, other, operation="-"):
        """Applies the binary operation to the profile objects"""

        tmp_dst_sub = np.zeros(shape=(self.nbin, self.ncen))
        tmp_dsx_sub = np.zeros(shape=(self.nbin, self.ncen))
        tmp_snum_sub = np.zeros(shape=(self.nbin, self.ncen))

        tmp_sub_labels = np.array(
            list(set(self.sub_labels).intersection(set(other.sub_labels))))
        tmp_subcounts = np.zeros((len(tmp_sub_labels), self.nbin))
        # print(self.subcounts)
        for i in range(len(tmp_sub_labels)):
            ind = tmp_sub_labels[i]
            ind1 = np.where(self.sub_labels == ind)[0][0]
            ind2 = np.where(other.sub_labels == ind)[0][0]
            for j in range(self.nbin):
                tmp_subcounts[i, j] = np.min(
                    (self.subcounts[ind1, j], other.subcounts[ind2, j]))

        for r in range(self.nbin):
            subind = tmp_sub_labels[np.where(tmp_subcounts[:, r] > 0)[0]]
            if np.max(tmp_subcounts[:, r]) == 1:
                self.rr[r] = -1

            njk = len(subind)
            if njk > 1:
                if operation == "-":
                    tmp_dst_sub[r, subind] = self.dst_sub[r, subind] - \
                                             other.dst_sub[r, subind]
                    tmp_dsx_sub[r, subind] = self.dsx_sub[r, subind] - \
                                             other.dsx_sub[r, subind]
                    tmp_snum_sub[r, subind] = self.snum_sub[r, subind] - \
                                             other.snum_sub[r, subind]
                elif operation == "+":
                    tmp_dst_sub[r, subind] = self.dst_sub[r, subind] + \
                                             other.dst_sub[r, subind]
                    tmp_dsx_sub[r, subind] = self.dsx_sub[r, subind] + \
                                             other.dsx_sub[r, subind]
                    tmp_snum_sub[r, subind] = self.snum_sub[r, subind] + \
                                             other.snum_sub[r, subind]
                elif operation == "*":
                    tmp_dst_sub[r, subind] = self.dst_sub[r, subind] * \
                                             other.dst_sub[r, subind]
                    tmp_dsx_sub[r, subind] = self.dsx_sub[r, subind] * \
                                             other.dsx_sub[r, subind]
                    tmp_snum_sub[r, subind] = self.snum_sub[r, subind] * \
                                             other.snum_sub[r, subind]
                elif operation == "/":
                    tmp_dst_sub[r, subind] = self.dst_sub[r, subind] / \
                                             other.dst_sub[r, subind]
                    tmp_dsx_sub[r, subind] = self.dsx_sub[r, subind] / \
                                             other.dsx_sub[r, subind]
                    tmp_snum_sub[r, subind] = self.snum_sub[r, subind] / \
                                             other.snum_sub[r, subind]

                else:
                    raise ValueError("Operation not supported, use ('+', '-', '*', '/')")

        # assigning updated containers
        self.sub_labels = tmp_sub_labels
        self.subcounts = tmp_subcounts
        self.dst_sub = tmp_dst_sub
        self.dsx_sub = tmp_dsx_sub
        self.snum_sub = tmp_snum_sub


[docs]    def multiply(self, val, keep_rr=True):
        """
        Calculate the JK estimate on profile times the specified "val",

        Operation performed in-place.
        Use deepcopy to obtain copies of the object for storing the previous state.

        Parameters
        ----------
        val : float or np.array
            value to multiply by
        """

        # clears the profile container
        self._reset_profile(keep_rr=keep_rr)

        if not keep_rr:
            # getting radius values
            self._get_rr()

        # performs the multiplication
        self.dst_sub *= val
        self.dsx_sub *= val

        # re calculates profiles
        self._profcalc()
        self._covcalc()

        self.hasprofile = True


[docs]    def composite(self, other, operation="-", keep_rr=True):
        """
        Calculate the JK estimate on the operation applied to the two profiles

        **Possible Operations:**

            * "-": self - other

            * "+": self + other

            * "*": self * other

            * "/": self / other

        The results is updated to self. Use deepcopy to obtain
        copies of the object for storing the previous state.


        Parameters
        ----------
        other : StackedProfileContainer
            an other profile...
        operation : str
            string specifying what to do...
        keep_rr : bool
            whether to reset radial values to placeholder
        """

        # making sure that there is a profile in both containers
        assert self.hasprofile and other.hasprofile

        # making sure that the two profiles use the same centers
        # err_msg = 'JK centers do not agree within 1e-5'
        # np.testing.assert_allclose(self.centers, other.centers,
        #                            rtol=1e-5, err_msg=err_msg)
        assert self.dst_sub.shape == other.dst_sub.shape

        # clears the profile container
        self._reset_profile(keep_rr=keep_rr)

        if not keep_rr:
            # getting radius values
            self._get_rr()

        # updates subprofiles
        self._composite_subprofiles(other=other, operation=operation)
        self._profcalc()
        self._covcalc()

        self.hasprofile = True

    def append(self, other, keep_rr=True):

        # making sure that there is a profile in both containers
        assert self.hasprofile and other.hasprofile

        # making sure that the two profiles use the same centers
        # err_msg = 'JK centers do not agree within 1e-5'
        # np.testing.assert_allclose(self.centers, other.centers,
        #                            rtol=1e-5, err_msg=err_msg)
        assert self.dst_sub.shape == other.dst_sub.shape

        # clears the profile container
        self._reset_profile(keep_rr=keep_rr)

        if not keep_rr:
            # getting radius values
            self._get_rr()

        # concat info, data, labels, metadata
        self.info = np.vstack((self.info, other.info))
        self.data = np.hstack((self.data, other.data))
        if self.labels is not None:
            self.labels = np.concatenate((self.labels, other.labels))
        if self.metadata is not None:
            _metadata = []
            for i in np.arange(4):
                _metadata.append(np.hstack((self.metadata[i], other.metadata[i])))
            self.metadata = _metadata

        # calculating the profiles
        self._setup_subpatches()
        self._subprofiles()
        self._profcalc()
        self._covcalc()

        self.hasprofile = True


[docs]def stacked_pcov(plist):
    """
    Calculates the Covariance between a list of profiles

    Only elements where both profile has at least 1 sources are considered, the rest is filled with placeholder
    values (-9999.0 by default).

    Parameters
    ----------
    plist: list of StackedProfileContainer objects
        profiles across whiche the covarainces are to be calcualted

    Returns
    -------
    np.array, np.array
        cov (E-mode), cov (B-mode)
    """
    # checking that input is of correct format
    # assert np.iterable(plist)
    # assert isinstance(plist[0], StackedProfileContainer)

    # data vectors for covariance
    dtvec = np.concatenate([pc.dst for pc in plist])
    dxvec = np.concatenate([pc.dsx for pc in plist])

    # lengths of bins
    dlen = len(dtvec)
    rlen = plist[0].nbin

    # container for the results
    supercov_t = np.ones(shape=(dlen, dlen)) * BADVAL
    supercov_x = np.ones(shape=(dlen, dlen)) * BADVAL

    # building up the covariance matrix
    for i1 in range(dlen):
        p1 = i1 // rlen  # the p-th profile
        pc1 = plist[p1]
        r1 = i1 % rlen  # the r-th radial bin within the p-th profile
        for i2 in range(dlen):
            p2 = i2 // rlen
            pc2 = plist[p2]
            r2 = i2 % rlen

            # calculating subpatches with data
            subind1 = pc1.sub_labels[np.where(pc1.subcounts[:, r1] > 1)[0]]
            subind2 = pc2.sub_labels[np.where(pc2.subcounts[:, r2] > 1)[0]]
            subind = list(set(subind1).intersection(set(subind2)))
            njk = len(subind)  # number of subpatches used
            if njk > 1:

                part1_t = (pc1.dst_sub[r1, subind] - pc1.dst[r1])
                part2_t = (pc2.dst_sub[r2, subind] - pc2.dst[r2])

                part1_x = (pc1.dsx_sub[r1, subind] - pc1.dsx[r1])
                part2_x = (pc2.dsx_sub[r2, subind] - pc2.dsx[r2])

                supercov_t[i1, i2] = np.sum(part1_t * part2_t) * (njk - 1.) / njk
                supercov_x[i1, i2] = np.sum(part1_x * part2_x) * (njk - 1.) / njk

    return supercov_t, supercov_x


[docs]def process_profile(fnames, ismeta=True, labels=None, weights=None, weight_key="ww", id_key="id", shear=False, Rs=None):
    """
    Extracts StackedProfileContainer from xshear output

    Parameters
    ----------
    fnames : list of str
        filenames to be read (without metatags)
    ismeta : bool
        whether to include METACALIBRATION files with :code:`sheared_tags`
    labels : list
        JK-labels for each entry in the lens files (in the order they are written to disk)
    weights: list
        weights to be applied to the lenses in calculating the average signal
    weight_key: str
        column name for weight DataFrame used to match to cluster IDs

    Returns
    -------
    StackedProfileContainer
        Weak Lensing profile object

    """

    if type(fnames) is str or len(fnames) == 0:
        if type(fnames) is not str:
            fnames = fnames[0]
        cinfo, cdata, sheared_data = read_single_bin(fnames, ismeta)
    else:
        cinfo, cdata, sheared_data, labels = read_multiple_bin(fnames, ismeta)

    ncens = len(fnames)

    prof = StackedProfileContainer(cinfo, cdata, labels, ncens,
                                   metadata=sheared_data,
                                   metatags=sheared_tags, Rs=Rs)
    if shear:
        prof.dst_denom = 8
        prof.dsx_denom = 9

        prof.meta_denom = 3
        prof.meta_prefac = 3
        prof.snum_ind = 3

    if weights is not None:
        weights = extract_weights(cinfo, weights, weight_key=weight_key, id_key=id_key)
        print(weights.sum())
        # weights = np.nan_to_num(weights).astype(int)
    # print(weights)
    prof.prof_maker(weights=weights)

    return prof


def load_weights(ibin):
    lenstab = fio.read(fullpaths[params["cat_to_use"]]["lens"].flatten()[ibin])
    weights = pd.DataFrame()
    weights["id"] = match_endian(lenstab[params["lenskey"]["id"]])
    weights["ww"] = match_endian(lenstab[params["lensweight"]])
    return weights

def extract_weights(info, weights, weight_key="ww", id_key="id"):

    itab = pd.DataFrame()
    itab["index_0"] = match_endian(info[:, 0])

    tab = pd.merge(itab, weights, how="left", left_on="index_0", right_on=id_key, ).fillna(0)

    return tab[weight_key].values


[docs]def olivers_mock_function(a, b, c):
    """
    This is a one line description


    This is a multi line descrption of what this is::

        \sum_a = b \cdot c


    Parameters
    ----------
    a : int
        index
    b : float
        exponent of the expression
    c : np.ndarray
        dataset

    Returns
    -------
    dict
        The output

    """
    pass


[docs]class AutoCalibrateProfile(object):
    """WEIGHTS must be from the base input dataset for Random points!!!"""
    def __init__(self, fname, fname_jk, pzcat, weights=None, id_key="MEM_MATCH_ID", weight_key="WEIGHT",
                 z_key="Z_LAMBDA", sbins=(2, 3), xlims=(0.2, 30), Rs_sbins=None, seed=None,
                 mfactor_sbins=None, mfactor_stds=None):
        """
        Automaticall Reads and calibrates weak lensing profiles according to DES Y3 standards

        Parameters
        ----------
        fname: str
            file name
        fname_jk: list of lists
            file names to Jackknife patches
        pzcat: sompz_reader object
            SOMPZ dataset
        weights: DataFrame
            DataFrame of IDs and weights
        id_key: str
            column key for IDs
        weight_key: str
            column key for weights
        z_key: str
            column key for redshifts
        sbins: tuple
            source bins to use
        xlims: tuple
            radius min and max in the units of xshear
        Rs_sbins: list of lists
            selection responses for the source bins
        seed: int
            np.random seed to use when needed
        mfactor_sbins: list
            multiplicative correction to apply for each source bin
        mfactor_stds: int
            std of multiplicative correction to apply for each source bin
        """
        self.fname = fname
        self.fname_jk = fname_jk
        self.pzcat = pzcat
        self.infos = create_infodict(fname_jk)

        self.weights = weights
        self.weight_key = weight_key
        self.id_key = id_key
        self.z_key = z_key

        self.sbins = sbins
        self.xlims = xlims

        self._profiles = []
        self.profile = None
        self.target = None
        self.scinvs = []
        self.Rs_sbins = Rs_sbins
        self.mfactor_sbins = mfactor_sbins
        self.mfactor_stds = mfactor_stds

        self.rng = np.random.RandomState()
        if seed is not None:
            self.rng.seed(seed)

[docs]    def load_profiles(self, ismeta=True, shear=True, **kwargs):
        """
        loads the lensing profiles from file (the xshear output)

        Parameters
        ----------
        ismeta: bool
            try to lead sheared metacal files from disk
        shear: bool
            shear or deltasigma profile as result
        """
        self._profiles = []
        for i, sbin in enumerate(self.sbins):
            print("loading source bin", sbin)
            clust_files = [info["outfile"].replace("_result.dat", "_bin" + str(sbin + 1) + "_result.dat")
                           for info in self.infos]
            Rs = None
            if self.Rs_sbins is not None:
                Rs = self.Rs_sbins[i]

            clust = process_profile(clust_files, ismeta=ismeta, Rs=Rs,
                                             weights=self.weights, weight_key=self.weight_key, id_key=self.id_key,
                                             shear=shear)
            # print(clust.dst)
            self._profiles.append(clust)

[docs]    def load_targets(self, **kwargs):
        """loads the lens catalog from file"""
        iname = self.infos[0]["infile"].split("_patch")[0] + ".fits"
        _target = fio.read(iname)
        target = to_pandas(_target)

        tmp = pd.DataFrame()
        tmp[self.id_key] = self._profiles[0].info[:, 0]
        self.target = pd.merge(tmp, target, how="left", on=self.id_key).reset_index(drop=True)
        self.target[self.weight_key] = 1.

        ww = np.ones(len(self.target))
        if self.weights is not None:
            ww = self.weights
        self.target[self.weight_key] = ww

[docs]    def get_scinvs(self, **kwargs):
        """Calculates the expected Sigma Crit Inverse from the redshift integral over Pz_src assuming a the mean z_lens"""
        zmean = np.average(self.target[self.z_key], weights=self.weights[self.weight_key])

        self.scinvs = []
        for sbin in self.sbins:
            self.scinvs.append(self.pzcat.get_single_scinv(zmean, sbin=sbin))
        self.scinvs = np.array(self.scinvs)

[docs]    def get_scinvs_bin(self, **kwargs):
        """Calculates the expected Sigma Crit Inverse from the double redshift integral over Pz_lens and Pz_src"""
        self.scinvs = []
        for sbin in self.sbins:
            self.scinvs.append(self.pzcat.get_bin_scinv(self.target[self.z_key], sbin=sbin, weights=self.target[self.weight_key].values))
        self.scinvs = np.array(self.scinvs)

[docs]    def combine_sbins(self, mfactor_sbins=None, mfactor_stds=None, weight_scrit_exponent=1):
        """
        Combine source bins

        Parameters
        ----------
        mfactor_sbins: list
            multiplicative correction to apply for each source bin
        mfactor_stds: int
            std of multiplicative correction to apply for each source bin
        """

        # print(mfactor_stds)
        _profiles = []
        for i, scinv in enumerate(self.scinvs):
            _profiles.append(copy.deepcopy(self._profiles[i]))
            if self.weights is not None:
                _weights = extract_weights(self._profiles[i].info, self.weights, weight_key=self.weight_key, id_key=self.id_key)
                _profiles[i].prof_maker(weights=_weights)
            else:
                _profiles[i].prof_maker()
            _profiles[i].multiply(scinv**weight_scrit_exponent)

        self.profile = _profiles[0]

        if mfactor_sbins is None:
            mfactor_sbins = np.ones(len(self.scinvs))
        if mfactor_stds is None:
            mfactor_stds = np.ones(shape=self.profile.dst_sub.shape)

        self.profile.multiply(mfactor_sbins[0])
        self.profile.multiply(mfactor_stds[0])

        for i in np.arange(len(_profiles) - 1):
            tmp = _profiles[i + 1]
            tmp.multiply(mfactor_sbins[i + 1])
            tmp.multiply(mfactor_stds[i + 1])
            self.profile.composite(tmp, operation="+")

        factor = 1. / np.sum(self.scinvs**(weight_scrit_exponent + 1))
        self.profile.multiply(factor)



[docs]    def get_profiles(self, reload=True, scinvs=None, mfactor_sbins=None, mfactor_stds=None, Rs_sbins=None,
                     weights=None, weight_key=None, id_key=None, z_key=None, **kwargs):
        """
        Loads and Calculates DeltaSigma profile from a combination of tomographic source bins

        Parameters
        ----------
        reload: bool
            If true, file leading is performed, if false re-processes the already loaded data
        scinvs: list
            list of Sigma Crit inverse values
        mfactor_sbins: list
            mean multiplicative shear bias and redshift bias for each tomographic bin
        Rs_sbins: list of lists
            selection responses for the source bins
        weights: DataFrame
            DataFrame of IDs and weights
        id_key: str
            column key for IDs
        weight_key: str
            column key for weights
        z_key: str
            column key for redshifts
        """

        if Rs_sbins is not None:
            self.Rs_sbins = Rs_sbins
        if mfactor_stds is not None:
            self.mfactor_stds = mfactor_stds
        if weights is not None:
            self.weights = weights
        if id_key is not None:
            self.id_key = id_key
        if weight_key is not None:
            self.weight_key = weight_key
        if z_key is not None:
            self.z_key = z_key

        if reload:
            self.load_profiles(Rs_sbins=Rs_sbins, **kwargs)

        self.scinvs = scinvs
        if self.scinvs is None:
            if reload:
                self.load_targets(**kwargs)
            self.get_scinvs_bin(**kwargs)
        self.combine_sbins(mfactor_sbins=mfactor_sbins, mfactor_stds=mfactor_stds)
        self.scale_cut()

[docs]    def composite(self, other, operation):
        """
        Add, Subtract, Multiply or Divide this object by an other object of the same class

        Parameters
        ----------
        other: AutoCalibrateProfile
            Object to composite with
        operation: str
            "+, -, *, /"

        Returns
        -------
        AutoCalibrateProfile
        """
        _profiles = copy.copy(self._profiles)
        res = []
        for i, prof in enumerate(_profiles):
            prof.composite(other._profiles[i], operation)
            res.append(prof)
        self._profiles = res

        self.combine_sbins()
        self.scale_cut()

[docs]    def scale_cut(self):
        """Applies radial scale cuts when needed"""
        self.rr = self.profile.rr
        self.dst = self.profile.dst
        self.dst_err = self.profile.dst_err
        self.cov = self.profile.dst_cov

        ii = (self.profile.rr > self.xlims[0]) & (self.profile.rr < self.xlims[1])
        self.rr = self.profile.rr[ii]
        self.dst = self.profile.dst[ii]
        self.dst_err = self.profile.dst_err[ii]
        self.cov = self.profile.dst_cov[ii, :][:, ii]

[docs]    def add_boost_jk(self, sboost, mfactor_sbins=None):
        """
        Add boost factors by correcting each Jackknife patch

        Parameters
        ----------
        sboost: SOMBoost
            Fitted boost factors
        """
        _profiles = []
        for i, scinv in enumerate(self.scinvs):
            _profiles.append(copy.deepcopy(self._profiles[i]))
            if self.weights is not None:
                _weights = extract_weights(self._profiles[i].info, self.weights, weight_key=self.weight_key, id_key=self.id_key)
                _profiles[i].prof_maker(weights=_weights)
            else:
                _profiles[i].prof_maker()
            fcl = sboost.resarr_jk[0][i][:, 2:].T
            _profiles[i].multiply(scinv * (1 / (1 - fcl)))

        if mfactor_sbins is None:
            mfactor_sbins = np.ones(len(self.scinvs))

        self.profile = _profiles[0]
        self.profile.multiply(mfactor_sbins[0])
        #
        for i in np.arange(len(_profiles) - 1):
            tmp = _profiles[i + 1]
            tmp.multiply(mfactor_sbins[i + 1])
            self.profile.composite(tmp, operation="+")

        factor = 1. / np.sum(self.scinvs**(2))
        self.profile.multiply(factor)
        print(self.profile.dst)
        self.scale_cut()


[docs]    def add_boost(self, sboost):
        """
        Boost uncertainty is added to the diagonal in quadrature of the covariance

        Parameters
        ----------
        sboost: SOMBoost
            Fitted boost factors
        """
        try:
            cov = sboost.covs[0]
        except:
            cov = np.zeros(shape=(len(self.dst), len(self.dst)))
        self._fcls = []
        amp = sboost.boost_amps[0]
        tmps = []
        self.fcl_err = []
        for i, _amp in enumerate(amp):
            # arr = np.concatenate((_amp, (0,)))
            self._fcls.append(_amp)
            tmps.append(_amp * self.scinvs[i])
            self.fcl_err.append(np.diag(cov[i]).mean() * self.scinvs[i])

        factor = 1. / self.scinvs.sum()
        self.fcl = np.sum(tmps, axis=0)  * factor
        self.fcl_err = np.sum(self.fcl_err) * factor

        ln = len(self.dst) - len(self.fcl)
        if ln > 0:
            pad = np.zeros(self.profile.dst_sub.shape[0] - len(self.fcl))
            self.fcl = np.concatenate((self.fcl, pad))
        self.profile.multiply((1 / (1 - self.fcl))[:, np.newaxis])
        self.scale_cut()
        self.cov = self.cov + np.diag(self.cov) * self.fcl_err

[docs]    def to_profile(self):
        """
        Extracts a light-weight version of the data container

        The only attributes are 'rr', 'dst', 'dst_err' and 'cov'
        """
        prof = ProfileContainer(self.rr, self.dst, self.dst_err, self.cov)
        return prof


[docs]    def copy(self):
        """Returns a deep copy of the object"""
        res = AutoCalibrateProfile(self.fname, self.fname_jk, self.pzcat, self.weights,
                                   id_key=self.id_key, weight_key=self.weight_key,
                                   sbins=self.sbins, xlims=self.xlims, Rs_sbins=self.Rs_sbins)
        res._profiles = []
        for prof in self._profiles:
            res._profiles.append(copy.deepcopy(prof))
        res.target = self.target.copy()
        res.scinvs = self.scinvs.copy()
        res.profile = copy.deepcopy(self.profile)
        return res


class ProfileContainer(object):
    def __init__(self, rr, dst, dst_err, cov):
        self.rr = rr
        self.dst = dst
        self.dst_err = dst_err
        self.cov = cov