Source code for xpipe.tools.catalogs
"""
Handles Fits -> Pandas transformations for tables with multidimensional "columns"
"""
import numpy as np
import pandas as pd
import sys
[docs]def to_pandas(recarr):
"""
Converts potentially nested record array (such as a FITS Table) into Pandas DataFrame
FITS tables sometimes have multidimensional columns, which are not supported for DataFrames
Pandas DataFrames however provide many nice features, such as SQL speed database matchings.
The approach is to flatten out multidimensional column [[COL]] into [COL_1, COL_2, ..., COL_N]
Examples
--------
Just pass the loaded FITS table::
import fitsio as fio
import xpipe.io.catalogs as catalogs
raw_data = fio.read("data.fits")
data = catalogs.to_pandas(raw_data)
Parameters
----------
recarr : numpy.array
array to be converted to DataFrame
Returns
-------
pandas.DataFrame
array as DataFrame
"""
newarr = flat_copy(recarr)
res = pd.DataFrame.from_records(newarr.byteswap().newbyteorder(), columns=newarr.dtype.names)
return res
[docs]def flat_type(recarr):
"""
Assigns the dtypes to the flattened array
Parameters
----------
recarr : numpy.array
array to be converted to DataFrame
Returns
-------
list
dtypes of flattened array
"""
newtype = []
for dt in recarr.dtype.descr:
if len(dt) == 3:
for i in np.arange(dt[2][0]):
newtype.append((dt[0] + '_' + str(i), dt[1]))
else:
newtype.append(dt)
return newtype
[docs]def flat_copy(recarr):
"""
Copies the record array into a new recarray which has only 1-D columns
Parameters
----------
recarr : numpy.array
array to be converted to DataFrame
Returns
-------
numpy.array
array with 1-D columns
"""
newtype = flat_type(recarr)
newarr = np.zeros(len(recarr), dtype=newtype)
oldnames = recarr.dtype.names
j = 0
for i, dt in enumerate(recarr.dtype.descr):
if len(dt) == 3:
for c in np.arange(dt[2][0]):
# print newtype[j]
newarr[newtype[j][0]] = recarr[oldnames[i]][:, c]
j += 1
else:
# print newtype[j]
newarr[newtype[j][0]] = recarr[oldnames[i]]
j += 1
return newarr
def match_endian(arr):
"""Tries to auto convert the Endian ordering of an array to match to native ordering"""
valid_endians = ["<", ">"]
endians = {
"little": "<",
"big": ">"
}
native_byteorder = endians[sys.byteorder]
array_byteorder = arr.dtype.byteorder
if len(arr.shape) != 1:
raise TypeError("only 1D arrays are applicable for this method")
if (array_byteorder in valid_endians) and native_byteorder != array_byteorder:
result = arr.newbyteorder().byteswap()
else:
result = arr
return result