Source code for fca.context

import numpy as np
from numpy import array_equiv


[docs]class Context: """Implements Formal Context. Formal context can be understand as triplet :math:`<A,B,I>` where A are objects, :math:`B` are attributes and :math:`I` is relation :math:`A` x :math:`B`. If :math:`<x1, y2>` is from :math:`I`, that means that object x1 has attribute y2. Parameters ---------- data_frames : :py:class:`pandas.DataFrame` Context is using :py:class:`pandas.DataFrame` as inner representation of relation :math:`I`. Than :py:class:`pandas.Index` are used for :math:`A` and :math:`B` representation and they are include in :py:class:`pandas.DataFrame` as index and columns. Data should included only True/False values or in can be scaled with :py:class:`pandas.DataFrame.get_dummies()` to multiple columns with 0/1 values. Attributes ---------- data_frame : :py:class:`pandas.DataFrame` Complete dataset related to formal context. objects : :py:class:`pandas.Index` All objects from context. attributes : :py:class:`pandas.Index` All attributes from context. """ def __init__(self, data_frame): self._df = None self.data_frame = data_frame def __str__(self): return str(self.data_frame) def __repr__(self): return repr(self.data_frame) def __eq__(self, other): if isinstance(self, other.__class__): return self.data_frame.equals(other.data_frame) return False def _duplicate_columns(self, frame): """Great snippet from user kalu on https://stackoverflow.com/a/32961145""" groups = frame.columns.to_series().groupby(frame.dtypes).groups dups = [] for t, v in groups.items(): cs = frame[v].columns vs = frame[v] lcs = len(cs) for i in range(lcs): ia = vs.iloc[:, i].values for j in range(i+1, lcs): ja = vs.iloc[:, j].values if array_equiv(ia, ja): dups.append(cs[i]) break return dups @property def data_frame(self): return self._df @data_frame.setter def data_frame(self, value): if value.index.duplicated().any(): raise ValueError("Duplicity in name of the objects") if value.columns.duplicated().any(): raise ValueError("Duplicity in name of the objects") self._df = value self.data = self._df.values self.objects = self._df.index self.attributes = self._df.columns
[docs] def number_of_objects(self): """Return number of objects""" return len(self.objects)
[docs] def number_of_attributes(self): """Return number of attributes""" return len(self.attributes)
[docs] def all_objects(self): """Return bool numpy array filled by True values. Lenght of this array is equals to number of objects""" return np.full(self.number_of_objects(), True)
[docs] def all_attributes(self): """Return bool numpy array filled by True values. Lenght of this array is equals to number of attribute""" return np.full(self.number_of_attributes(), True)
[docs] def empty_objects(self): """Return bool numpy array filled by False values. Lenght of this array is equals to number of objects""" return np.full(self.number_of_objects(), False)
[docs] def empty_attributes(self): """Return bool numpy array filled by False values. Lenght of this array is equals to number of attributes""" return np.full(self.number_of_attributes(), False)
[docs] def clarificate(self): """Inplace operation which clarify the context. Clarificated context is context which does not have duplicate columns/rows. Only one of these columns/rows is included.""" # drop_duplicates is Pandas function for removing duplicated rows self.data_frame = self.data_frame.drop_duplicates() # drop duplicate column based on code snippet from stackoverflow duplicated_columns = self._duplicate_columns(self.data_frame) self.data_frame = self.data_frame.drop(duplicated_columns, axis=1)
[docs] def up(self, objects, out=None): """Find all attributes covered by the objects. Parameters ---------- objects : :py:class:`numpy.array` Numpy bool array, if index contains True value, objects with this index is included in selection. out : :py:class:`numpy.array` If value is None (default) output is outputed to newly allocated array, otherwise provided array is used. Returns ------- attributes : :py:class:`numpy.array` NumPy array with boolean values representing if attribute on index is part of the result. """ return np.logical_and.reduce(self.data[objects], out=out)
[docs] def down(self, attributes, out=None): """Find all objects covered by the attributes. Parameters ---------- attributes : :py:class:`numpy.array` Numpy bool array, if index contains True value, attributes with this index is included in selection. out : :py:class:`numpy.array` If value is None (default) output is outputed to newly allocated array, otherwise provided array is used. Returns ------- objects : :py:class:`numpy.array` NumPy array with boolean values representing if object on index is part of the result. """ return np.logical_and.reduce(self.data[:, attributes], axis=1, out=out)