Source code for fca.context
import numpy as np
from numpy import array_equiv
[docs]class Context:
"""Implements Formal Context.
Formal context can be understand as triplet :math:`<A,B,I>` where A are
objects, :math:`B` are attributes and :math:`I` is relation
:math:`A` x :math:`B`. If :math:`<x1, y2>` is from :math:`I`, that means
that object x1 has attribute y2.
Parameters
----------
data_frames : :py:class:`pandas.DataFrame`
Context is using :py:class:`pandas.DataFrame` as inner representation
of relation :math:`I`. Than :py:class:`pandas.Index` are used for
:math:`A` and :math:`B` representation and they are include in
:py:class:`pandas.DataFrame` as index and columns. Data should included
only True/False values or in can be scaled with
:py:class:`pandas.DataFrame.get_dummies()` to multiple columns with 0/1
values.
Attributes
----------
data_frame : :py:class:`pandas.DataFrame`
Complete dataset related to formal context.
objects : :py:class:`pandas.Index`
All objects from context.
attributes : :py:class:`pandas.Index`
All attributes from context.
"""
def __init__(self, data_frame):
self._df = None
self.data_frame = data_frame
def __str__(self):
return str(self.data_frame)
def __repr__(self):
return repr(self.data_frame)
def __eq__(self, other):
if isinstance(self, other.__class__):
return self.data_frame.equals(other.data_frame)
return False
def _duplicate_columns(self, frame):
"""Great snippet from user kalu on
https://stackoverflow.com/a/32961145"""
groups = frame.columns.to_series().groupby(frame.dtypes).groups
dups = []
for t, v in groups.items():
cs = frame[v].columns
vs = frame[v]
lcs = len(cs)
for i in range(lcs):
ia = vs.iloc[:, i].values
for j in range(i+1, lcs):
ja = vs.iloc[:, j].values
if array_equiv(ia, ja):
dups.append(cs[i])
break
return dups
@property
def data_frame(self):
return self._df
@data_frame.setter
def data_frame(self, value):
if value.index.duplicated().any():
raise ValueError("Duplicity in name of the objects")
if value.columns.duplicated().any():
raise ValueError("Duplicity in name of the objects")
self._df = value
self.data = self._df.values
self.objects = self._df.index
self.attributes = self._df.columns
[docs] def number_of_objects(self):
"""Return number of objects"""
return len(self.objects)
[docs] def number_of_attributes(self):
"""Return number of attributes"""
return len(self.attributes)
[docs] def all_objects(self):
"""Return bool numpy array filled by True values. Lenght of this array
is equals to number of objects"""
return np.full(self.number_of_objects(), True)
[docs] def all_attributes(self):
"""Return bool numpy array filled by True values. Lenght of this array
is equals to number of attribute"""
return np.full(self.number_of_attributes(), True)
[docs] def empty_objects(self):
"""Return bool numpy array filled by False values. Lenght of this array
is equals to number of objects"""
return np.full(self.number_of_objects(), False)
[docs] def empty_attributes(self):
"""Return bool numpy array filled by False values. Lenght of this array
is equals to number of attributes"""
return np.full(self.number_of_attributes(), False)
[docs] def clarificate(self):
"""Inplace operation which clarify the context. Clarificated context is
context which does not have duplicate columns/rows. Only one of these
columns/rows is included."""
# drop_duplicates is Pandas function for removing duplicated rows
self.data_frame = self.data_frame.drop_duplicates()
# drop duplicate column based on code snippet from stackoverflow
duplicated_columns = self._duplicate_columns(self.data_frame)
self.data_frame = self.data_frame.drop(duplicated_columns, axis=1)
[docs] def up(self, objects, out=None):
"""Find all attributes covered by the objects.
Parameters
----------
objects : :py:class:`numpy.array`
Numpy bool array, if index contains True value, objects with this
index is included in selection.
out : :py:class:`numpy.array`
If value is None (default) output is outputed to newly allocated
array, otherwise provided array is used.
Returns
-------
attributes : :py:class:`numpy.array`
NumPy array with boolean values representing if attribute on index
is part of the result.
"""
return np.logical_and.reduce(self.data[objects], out=out)
[docs] def down(self, attributes, out=None):
"""Find all objects covered by the attributes.
Parameters
----------
attributes : :py:class:`numpy.array`
Numpy bool array, if index contains True value, attributes with
this index is included in selection.
out : :py:class:`numpy.array`
If value is None (default) output is outputed to newly allocated
array, otherwise provided array is used.
Returns
-------
objects : :py:class:`numpy.array`
NumPy array with boolean values representing if object on index
is part of the result.
"""
return np.logical_and.reduce(self.data[:, attributes], axis=1, out=out)