semopy



Module semopy.stats

Statistic and fit indices.

Expand source code
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Statistic and fit indices."""
from .utils import duplication_matrix
from scipy.stats import norm, chi2
from collections import namedtuple
from .optimizer import Optimizer
from .model_generalized_effects import ModelGeneralizedEffects
from .model_effects import ModelEffects
from .model_means import ModelMeans
from .model import Model
import pandas as pd
import numpy as np

ParameterStatistics = namedtuple('ParametersStatistics',
                                 ['value', 'se', 'zscore', 'pvalue'])
SEMStatistics = namedtuple('SEMStatistics', ['dof', 'ml', 'fun',
                                             'chi2', 'dof_baseline',
                                             'chi2_baseline', 'rmsea', 'cfi',
                                             'gfi', 'agfi', 'nfi', 'tli',
                                             'aic', 'bic', 'params'])


def get_baseline_model(model, data=None):
    """
    Retrieve a the baseline model from given model.

    Baseline model here is an independence model where all variables are
    considered to be independent with zero covariance. Only variances are
    estimated.
    Parameters
    ----------
    model : Model
        Model.
    data : pd.DataFrame, optional
        Data, extracting from model will be attempted if no data provided.
        (It's assumed that model.load took place). The default is None.
    cov : pd.DataFrame, optional
        Covariance matrix can be provided in the lack of presense of data.
        The default is None.

    Returns
    -------
    mod : Model
        Baseline model.

    """
    if type(model) is str:
        mod = Model(model, baseline=True)
        if data:
            mod.load(data)
        return mod
    desc = model.description
    tp = type(model)
    tp_name = tp.__name__
    if tp_name not in ('Model', ):
        mod = tp(desc, baseline=True, intercepts=model.intercepts)
    else:
        mod = tp(desc, baseline=True)
    try:
        if not data:
            if hasattr(model, 'mx_data'):
                mx = model.mx_data
                vs = model.vars['observed']
                if hasattr(model, 'mx_g'):
                    mx = np.append(mx, model.mx_g.T, axis=1)
                    vs = vs + model.vars['observed_exogenous']
                data = pd.DataFrame(data=mx, columns=vs)
                if hasattr(model, 'group'):
                    data[model.group] = model.group_data[0, :]
                if tp_name in ('ModelEffects',):
                    mod.load(data, group=model.group, k=model.passed_k)
                else:
                    mod.load(data)
            else:
                data = pd.DataFrame(data=model.mx_cov,
                                    index=model.vars['observed'],
                                    columns=model.vars['observed']) 
                mod.load(cov=data, n_samples=model.n_samples)
    except AttributeError:
        pass
    return mod


def __get_chi2_base(model):
    """
    Calculate chi2 of baseline model.

    Parameters
    ----------
    model : Model
        Model.

    Returns
    -------
    tuple
        chi2, dof of baseline model.

    """
    mod_base = get_baseline_model(model)
    obj = model.last_result.name_obj
    if type(mod_base).__name__ in ('ModelEffects', 'ModelGeneralizedEffects'):
        if obj == 'REML2':
            obj = 'REML'
        elif obj == 'FIML':
            obj = 'ML'
    elif type(mod_base).__name__ in ('ModelMeans', ):
        if obj == 'FIML':
            obj = 'ML'
    mod_base.fit(obj=obj)
    chi2_base = calc_chi2(mod_base)[0]
    return chi2_base, calc_dof(mod_base)


def calc_gfi(model, chi2=None, chi2_base=None):
    """
    Calculate GFI (goodness-of-fit index).

    Parameters
    ----------
    model : model
        Model.
    chi2 : TYPE, optional
        chi2 statistic. The default is None.
    chi2_base : TYPE, optional
        chi2 statistic for baseline model. The default is None.

    Returns
    -------
    float
        GFI.

    """
    if chi2 is None:
        chi2 = calc_chi2(model)[0]
    if chi2_base is None:
        chi2_base = __get_chi2_base(model)[0]
    return 1 - chi2 / chi2_base


def calc_agfi(model, dof=None, dof_bs=None, gfi=None):
    """
    Calculate AGFI (adjusted goodness-of-fit index).

    Parameters
    ----------
    model : Model
        Model.
    dof : int, optional
        Degrees of freedom. The default is None.
    dof_bs : float, optional
        Degrees of freedom of baseline model. The default is None.
    gfi : float, optional
        GFI statistic. The default is None.

    Returns
    -------
    float
        AGFI.

    """
    if dof is None:
        dof = calc_dof(model)
    if dof == 0:
        return np.nan
    if dof_bs is None:
        base = get_baseline_model(model)
        dof_bs = calc_dof(base)
    if gfi is None:
        gfi = calc_gfi(model)
    return 1 - dof_bs / dof * (1 - gfi)  # k * (k + 1) / (2 * dof) * (1 - gfi)


def calc_dof(model):
    """
    Calculate degrees of freedom.

    Parameters
    ----------
    model : Model
        Model.

    Returns
    -------
    int
        DoF.

    """
    p = len(model.vars['observed'])
    return p * (p + 1) // 2 - len(model.param_vals)


def calc_nfi(model, chi2=None, chi2_base=None):
    """
    Calculate NFI (Normed Fit Index).

    Parameters
    ----------
    model : Model
        Model.
    chi2 : float, optional
        chi2 statistic. The default is None.
    chi2_base : TYPE, optional
        chi2 statistic of baseline model. The default is None.

    Returns
    -------
    float
        NFI.

    """
    if chi2 is None:
        chi2 = calc_chi2(model)[0]
    if chi2_base is None:
        chi2_base = __get_chi2_base(model)[0]
    if chi2_base == 0:
        return np.nan
    return (chi2_base - chi2) / chi2_base


def calc_tli(model, dof=None, chi2=None, dof_base=None, chi2_base=None):
    """
    Calculate TLI (Tucker and Lewis Index).

    Parameters
    ----------
    model : Model
        Model.
    dof : int, optional
        Degrees of freedom statistic. The default is None.
    chi2 : float, optional
        chi2 statistic. The default is None.
    dof_base : int, optional
        Degrees of freedom of baseline model. The default is None.
    chi2_base : float, optional
        chi2 statistic of baseline model. The default is None.

    Returns
    -------
    float
        TLI.

    """
    if chi2 is None:
        chi2 = calc_chi2(model)[0]
    if chi2_base is None or dof_base is None:
        chi2_base, dof_base = __get_chi2_base(model)
    if dof is None:
        dof = calc_dof(model)
    if dof == 0 or dof_base == 0:
        return np.nan
    a, b = chi2 / dof, chi2_base / dof_base
    return (b - a) / (b - 1)


def calc_cfi(model, dof=None, chi2=None, dof_base=None, chi2_base=None):
    """
    Calculate CFI (Comparative Fit Index).

    Parameters
    ----------
    model : Model
        Model.
    dof : int, optional
        Degrees of freedom statistic. The default is None.
    chi2 : float, optional
        chi2 statistic. The default is None.
    dof_base : int, optional
        Degrees of freedom statistic of baseline model. The default is None.
    chi2_base : float, optional
        chi2 statistic of baseline model. The default is None.

    Returns
    -------
    float
        CFI.

    """
    if chi2 is None:
        chi2 = calc_chi2(model)[0]
    if dof is None:
        dof = calc_dof(model)
    if chi2_base is None or dof_base is None:
        chi2_base, dof_base = __get_chi2_base(model)
    a = chi2 - dof
    b = chi2_base - dof_base
    return 1 - a / b


def calc_chi2(model, dof=None):
    """
    Calculate chi-square statistic.

    Parameters
    ----------
    model : Model
        Model.
    dof : int, optional
        Degrees of freedom statistic. The default is None.

    Returns
    -------
    tuple
        chi2 statistic and p-value.

    """
    if dof is None:
        dof = calc_dof(model)
    if model.last_result.name_obj == 'FIML':
        stat = model.last_result.fun / model.n_samples
    else:
        stat = model.n_samples * model.last_result.fun
    return stat, 1 - chi2.cdf(stat, dof)


def calc_chi2_sb(model, stat=None, dof=None):
    """
    Calculate scaled (Satorra-Bentler) chi2.

    Parameters
    ----------
    model : Model
        Model.
    stat : float, optional
        chi2 statistic. The default is None.
    dof : int, optional
        Degrees of freedom. The default is None.

    Returns
    -------
    float
        SB chi2.
    float
        p-value.

    """
    if stat is None:
        stat = calc_chi2(model)[0]
    if dof is None:
        dof = calc_dof(model)
    sigma, (m, c) = model.calc_sigma()
    inds = np.triu_indices_from(sigma)
    grad = np.array([g[inds].flatten(order='F')
                    for g in model.calc_sigma_grad(m, c)]).T
    sigma_inv = np.linalg.inv(sigma)
    k = np.kron(sigma_inv, sigma_inv)
    dup = duplication_matrix(sigma.shape[0])
    w = dup.T @ k @ dup / 2
    w = np.linalg.inv(w)
    t = w @ grad
    u = w - t @ np.linalg.inv(grad.T @ t) @ t.T
    if hasattr(model, 'mx_w'):
        c = np.einsum('ij,ji->', u, model.mx_w) / dof
    else:
        c = np.trace(u @ np.linalg.inv(w)) / dof
        return t, grad, w, u
    stat /= c
    return stat, 1 - chi2.cdf(stat, dof)


def calc_rmsea(model, chi2=None, dof=None):
    """
    Calculate RMSEA statistic.

    Parameters
    ----------
    model : Model
        Model.
    chi2 : float, optional
        chi2 statistic. The default is None.
    dof : int, optional
        Degrees of freedom statistic. The default is None.

    Returns
    -------
    float
        RMSEA.

    """
    if chi2 is None:
        chi2 = calc_chi2(model)[0]
    if dof is None:
        dof = calc_dof(model)
    if chi2 < dof:
        return 0
    return np.sqrt((chi2 / dof - 1) / (model.n_samples - 1))


def calc_likelihood(model):
    """
    Calculate likelihood.

    Parameters
    ----------
    model : Model
        Model.

    Returns
    -------
    float
        Loglikelihood.

    """
    # TODO
    return model.obj_mlw(model.param_vals)


def calc_aic(model, lh=None):
    """
    Calculate Akaike's Information Criteria (AIC) statistic.

    Parameters
    ----------
    model : Model
        Model.
    lh : float, optional
        Loglikelihood. The default is None.

    Returns
    -------
    float
        AIC.

    """
    if lh is None:
        lh = calc_likelihood(model)
    return 2 * (len(model.param_vals) - lh)


def calc_bic(model, lh=None):
    """
    Calculate Bayesian Information Criteria (BIC) statistic.

    Parameters
    ----------
    model : Model
        Model.
    lh : float, optional
        Loglikelihood. The default is None.

    Returns
    -------
    float
        BIC.

    """
    if lh is None:
        lh = calc_likelihood(model)
    k, n = len(model.param_vals), model.n_samples
    return np.log(n) * k - 2 * lh


def calc_se(model, information='expected', robust=False):
    """
    Calculate standard errors.

    Parameters
    ----------
    model : Model
        Model.
    information : str
        If 'expected', expected Fisher information is used. Otherwise,
        observed information is employed. The default is 'expected'.
    robust : bool, optional
        If True, then robust SE are computed instead. Robustness here means
        that MLR-esque sandwich correction is applied. The default is False.

    Returns
    -------
    list
        list of standard errors of model's active parameters.

    """
    if robust or information != 'expected':
        # An explanation of "mult" for a reader:
        # When I was writing code and deriving some formulas for different
        # objective functions, I was finding myself quiet often dropping
        # constants when they appeared to be not necessary. Sometimes I didn't.
        # It is a mess. Here I am taking that in account. Some may say that
        # that I'd better just get those goddamn constants back in place
        # and get rid of this clusterfuck, and they'd be right, but I am
        # currently too hesitant to do it myself. 
        # My sincere apologies.
        # Georgy
        mult = model.n_samples / 2
        if hasattr(model, 'last_result'):
            fun, grad = model.get_objective(model.last_result.name_obj)
            if model.last_result.name_obj in ('FIML'):
                mult = 0.5
        else:
            try:
                fun, grad = model.get_objective('MatNorm')
            except KeyError:
                try:
                    fun, grad = model.get_objective('FIML')
                except KeyError:
                    fun, grad = model.get_objective('MLW')
                    mult = 1.0
        
    if information == 'expected':
        mx_inf = model.calc_fim(inverse=True)[1]
    else:
        from numdifftools import Gradient, Hessian
        if grad is None:
            hess = Hessian(fun)(model.param_vals)
        else:
            hess = Gradient(grad)(model.param_vals)
        mx_inf = np.linalg.pinv(hess) / mult
    if robust:
        g = sum(np.outer(g,g) for g in model.grad_se_g(model.param_vals))
        mx_inf = mx_inf @ g @ mx_inf
    variances = mx_inf.diagonal().copy()
    inds = (variances < 0) & (variances > -1e-1)
    variances[inds] = 1e-12
    variances[variances < 0] = np.nan  # So numpy won't throw a warning.
    return np.sqrt(variances) 


def calc_zvals(model, std_errors=None, information='expected'):
    """
    Calculate Z-score.

    Parameters
    ----------
    model : Model
        Model.
    std_errors : list, optional
        Standard errors. The default is None.
    information : str
        If 'expected', expected Fisher information is used. Otherwise,
        observed information is employed. No use if std_errors are provided.
        The default is 'expected'.

    Returns
    -------
    list
        Z-scores.

    """
    if std_errors is None:
        std_errors = calc_se(model, information=information)
    return [val / (std + 1e-12) for val, std in zip(list(model.param_vals),
                                                    std_errors)]


def calc_pvals(model, z_scores=None, information='expected'):
    """
    Calculate p-values.

    Parameters
    ----------
    model : Model
        Model.
    z_scores : list, optional
        Z-sores. The default is None.
    information : str
        If 'expected', expected Fisher information is used. Otherwise,
        observed information is employed. No use if std_errors are provided.
        The default is 'expected'.

    Returns
    -------
    list
        P-values.

    """
    if z_scores is None:
        z_scores = calc_zvals(model, information=information)
    return [2 * (1 - norm.cdf(abs(z))) for z in z_scores]


def calc_stats(model):
    """
    Calculate fit indices and other auxiliary statistics.

    Parameters
    ----------
    model : Model
        Fit model instance.

    Returns
    -------
    pd.DataFrame
        Fit indices and statistics.

    """
    if not hasattr(model, 'last_result'):
        raise Exception('Can''t gather statitics from model until it is fit.')
    try:
        lh = calc_likelihood(model)
    except np.linalg.LinAlgError:
        lh = np.nan
    aic = calc_aic(model, lh)
    bic = calc_bic(model, lh)
    dof = calc_dof(model)
    chi2_base, dof_base = __get_chi2_base(model)
    chi2 = calc_chi2(model, dof)
    rmsea = calc_rmsea(model, chi2[0], dof)
    cfi = calc_cfi(model, dof, chi2[0], dof_base, chi2_base)
    gfi = calc_gfi(model, chi2[0], chi2_base)
    agfi = calc_agfi(model, dof, dof_base, gfi)
    nfi = calc_nfi(model, chi2[0], chi2_base)
    tli = calc_tli(model, dof, chi2[0], dof_base, chi2_base)
    d = {'DoF': dof, 'DoF Baseline': dof_base,
         'chi2': chi2[0], 'chi2 p-value': chi2[1], 'chi2 Baseline': chi2_base,
         'CFI': cfi, 'GFI': gfi, 'AGFI': agfi, 'NFI': nfi, 'TLI': tli,
         'RMSEA': rmsea, 'AIC': aic, 'BIC': bic, 'LogLik': lh}
    return pd.DataFrame(d, index=['Value'])


def gather_statistics(model, information='expected', robust=False):
    """
    Retrieve all statistics as specified in SEMStatistics structure.

    Needed for backwards-compaitability with semopy 1.+.
    Parameters
    ----------
    model : Model
        Model.
    information : str
        If 'expected', expected Fisher information is used. Otherwise,
        observed information is employed. The default is 'expected'.
    robust : bool, optional
        If True, then robust SE are computed instead. Robustness here means
        that MLR-esque sandwich correction is applied. The default is False.

    Raises
    ------
    Exception
        Rises when no data is loaded into the model.

    Returns
    -------
    SEMStatistics
        Namedtuple containing all statistics available to semopy.

    """
    if isinstance(model, Optimizer):
        model = model.model
    if not hasattr(model, 'last_result'):
        raise Exception('Can''t gather statitics from model until it is fit.')
    values = model.param_vals.copy()
    try:
        std_errors = calc_se(model, information=information, robust=robust)
    except np.linalg.LinAlgError:
        std_errors = np.array([np.nan] * len(values))
    z_scores = calc_zvals(model, std_errors)
    pvalues = calc_pvals(model, z_scores)
    try:
        lh = calc_likelihood(model)
    except np.linalg.LinAlgError:
        lh = np.nan
    aic = calc_aic(model, lh)
    bic = calc_bic(model, lh)
    paramStats = [ParameterStatistics(val, std, ztest, pvalue)
                  for val, std, ztest, pvalue
                  in zip(values, std_errors, z_scores, pvalues)]
    dof = calc_dof(model)
    chi2_base, dof_base = __get_chi2_base(model)
    chi2 = calc_chi2(model, dof)
    rmsea = calc_rmsea(model, chi2[0], dof)
    cfi = calc_cfi(model, dof, chi2[0], dof_base, chi2_base)
    gfi = calc_gfi(model, chi2[0], chi2_base)
    agfi = calc_agfi(model, dof, dof_base, gfi)
    nfi = calc_nfi(model, chi2[0], chi2_base)
    tli = calc_tli(model, dof, chi2[0], dof_base, chi2_base)
    return SEMStatistics(dof, model.last_result.fun, lh, chi2, dof_base,
                         chi2_base, rmsea, cfi, gfi, agfi, nfi, tli, aic, bic,
                         paramStats)

Functions

def calc_agfi(model, dof=None, dof_bs=None, gfi=None)

Calculate AGFI (adjusted goodness-of-fit index).

Parameters

model : Model
Model.
dof : int, optional
Degrees of freedom. The default is None.
dof_bs : float, optional
Degrees of freedom of baseline model. The default is None.
gfi : float, optional
GFI statistic. The default is None.

Returns

float
AGFI.
Expand source code
def calc_agfi(model, dof=None, dof_bs=None, gfi=None):
    """
    Calculate AGFI (adjusted goodness-of-fit index).

    Parameters
    ----------
    model : Model
        Model.
    dof : int, optional
        Degrees of freedom. The default is None.
    dof_bs : float, optional
        Degrees of freedom of baseline model. The default is None.
    gfi : float, optional
        GFI statistic. The default is None.

    Returns
    -------
    float
        AGFI.

    """
    if dof is None:
        dof = calc_dof(model)
    if dof == 0:
        return np.nan
    if dof_bs is None:
        base = get_baseline_model(model)
        dof_bs = calc_dof(base)
    if gfi is None:
        gfi = calc_gfi(model)
    return 1 - dof_bs / dof * (1 - gfi)  # k * (k + 1) / (2 * dof) * (1 - gfi)
def calc_aic(model, lh=None)

Calculate Akaike's Information Criteria (AIC) statistic.

Parameters

model : Model
Model.
lh : float, optional
Loglikelihood. The default is None.

Returns

float
AIC.
Expand source code
def calc_aic(model, lh=None):
    """
    Calculate Akaike's Information Criteria (AIC) statistic.

    Parameters
    ----------
    model : Model
        Model.
    lh : float, optional
        Loglikelihood. The default is None.

    Returns
    -------
    float
        AIC.

    """
    if lh is None:
        lh = calc_likelihood(model)
    return 2 * (len(model.param_vals) - lh)
def calc_bic(model, lh=None)

Calculate Bayesian Information Criteria (BIC) statistic.

Parameters

model : Model
Model.
lh : float, optional
Loglikelihood. The default is None.

Returns

float
BIC.
Expand source code
def calc_bic(model, lh=None):
    """
    Calculate Bayesian Information Criteria (BIC) statistic.

    Parameters
    ----------
    model : Model
        Model.
    lh : float, optional
        Loglikelihood. The default is None.

    Returns
    -------
    float
        BIC.

    """
    if lh is None:
        lh = calc_likelihood(model)
    k, n = len(model.param_vals), model.n_samples
    return np.log(n) * k - 2 * lh
def calc_cfi(model, dof=None, chi2=None, dof_base=None, chi2_base=None)

Calculate CFI (Comparative Fit Index).

Parameters

model : Model
Model.
dof : int, optional
Degrees of freedom statistic. The default is None.
chi2 : float, optional
chi2 statistic. The default is None.
dof_base : int, optional
Degrees of freedom statistic of baseline model. The default is None.
chi2_base : float, optional
chi2 statistic of baseline model. The default is None.

Returns

float
CFI.
Expand source code
def calc_cfi(model, dof=None, chi2=None, dof_base=None, chi2_base=None):
    """
    Calculate CFI (Comparative Fit Index).

    Parameters
    ----------
    model : Model
        Model.
    dof : int, optional
        Degrees of freedom statistic. The default is None.
    chi2 : float, optional
        chi2 statistic. The default is None.
    dof_base : int, optional
        Degrees of freedom statistic of baseline model. The default is None.
    chi2_base : float, optional
        chi2 statistic of baseline model. The default is None.

    Returns
    -------
    float
        CFI.

    """
    if chi2 is None:
        chi2 = calc_chi2(model)[0]
    if dof is None:
        dof = calc_dof(model)
    if chi2_base is None or dof_base is None:
        chi2_base, dof_base = __get_chi2_base(model)
    a = chi2 - dof
    b = chi2_base - dof_base
    return 1 - a / b
def calc_chi2(model, dof=None)

Calculate chi-square statistic.

Parameters

model : Model
Model.
dof : int, optional
Degrees of freedom statistic. The default is None.

Returns

tuple
chi2 statistic and p-value.
Expand source code
def calc_chi2(model, dof=None):
    """
    Calculate chi-square statistic.

    Parameters
    ----------
    model : Model
        Model.
    dof : int, optional
        Degrees of freedom statistic. The default is None.

    Returns
    -------
    tuple
        chi2 statistic and p-value.

    """
    if dof is None:
        dof = calc_dof(model)
    if model.last_result.name_obj == 'FIML':
        stat = model.last_result.fun / model.n_samples
    else:
        stat = model.n_samples * model.last_result.fun
    return stat, 1 - chi2.cdf(stat, dof)
def calc_chi2_sb(model, stat=None, dof=None)

Calculate scaled (Satorra-Bentler) chi2.

Parameters

model : Model
Model.
stat : float, optional
chi2 statistic. The default is None.
dof : int, optional
Degrees of freedom. The default is None.

Returns

float
SB chi2.
float
p-value.
Expand source code
def calc_chi2_sb(model, stat=None, dof=None):
    """
    Calculate scaled (Satorra-Bentler) chi2.

    Parameters
    ----------
    model : Model
        Model.
    stat : float, optional
        chi2 statistic. The default is None.
    dof : int, optional
        Degrees of freedom. The default is None.

    Returns
    -------
    float
        SB chi2.
    float
        p-value.

    """
    if stat is None:
        stat = calc_chi2(model)[0]
    if dof is None:
        dof = calc_dof(model)
    sigma, (m, c) = model.calc_sigma()
    inds = np.triu_indices_from(sigma)
    grad = np.array([g[inds].flatten(order='F')
                    for g in model.calc_sigma_grad(m, c)]).T
    sigma_inv = np.linalg.inv(sigma)
    k = np.kron(sigma_inv, sigma_inv)
    dup = duplication_matrix(sigma.shape[0])
    w = dup.T @ k @ dup / 2
    w = np.linalg.inv(w)
    t = w @ grad
    u = w - t @ np.linalg.inv(grad.T @ t) @ t.T
    if hasattr(model, 'mx_w'):
        c = np.einsum('ij,ji->', u, model.mx_w) / dof
    else:
        c = np.trace(u @ np.linalg.inv(w)) / dof
        return t, grad, w, u
    stat /= c
    return stat, 1 - chi2.cdf(stat, dof)
def calc_dof(model)

Calculate degrees of freedom.

Parameters

model : Model
Model.

Returns

int
DoF.
Expand source code
def calc_dof(model):
    """
    Calculate degrees of freedom.

    Parameters
    ----------
    model : Model
        Model.

    Returns
    -------
    int
        DoF.

    """
    p = len(model.vars['observed'])
    return p * (p + 1) // 2 - len(model.param_vals)
def calc_gfi(model, chi2=None, chi2_base=None)

Calculate GFI (goodness-of-fit index).

Parameters

model : model
Model.
chi2 : TYPE, optional
chi2 statistic. The default is None.
chi2_base : TYPE, optional
chi2 statistic for baseline model. The default is None.

Returns

float
GFI.
Expand source code
def calc_gfi(model, chi2=None, chi2_base=None):
    """
    Calculate GFI (goodness-of-fit index).

    Parameters
    ----------
    model : model
        Model.
    chi2 : TYPE, optional
        chi2 statistic. The default is None.
    chi2_base : TYPE, optional
        chi2 statistic for baseline model. The default is None.

    Returns
    -------
    float
        GFI.

    """
    if chi2 is None:
        chi2 = calc_chi2(model)[0]
    if chi2_base is None:
        chi2_base = __get_chi2_base(model)[0]
    return 1 - chi2 / chi2_base
def calc_likelihood(model)

Calculate likelihood.

Parameters

model : Model
Model.

Returns

float
Loglikelihood.
Expand source code
def calc_likelihood(model):
    """
    Calculate likelihood.

    Parameters
    ----------
    model : Model
        Model.

    Returns
    -------
    float
        Loglikelihood.

    """
    # TODO
    return model.obj_mlw(model.param_vals)
def calc_nfi(model, chi2=None, chi2_base=None)

Calculate NFI (Normed Fit Index).

Parameters

model : Model
Model.
chi2 : float, optional
chi2 statistic. The default is None.
chi2_base : TYPE, optional
chi2 statistic of baseline model. The default is None.

Returns

float
NFI.
Expand source code
def calc_nfi(model, chi2=None, chi2_base=None):
    """
    Calculate NFI (Normed Fit Index).

    Parameters
    ----------
    model : Model
        Model.
    chi2 : float, optional
        chi2 statistic. The default is None.
    chi2_base : TYPE, optional
        chi2 statistic of baseline model. The default is None.

    Returns
    -------
    float
        NFI.

    """
    if chi2 is None:
        chi2 = calc_chi2(model)[0]
    if chi2_base is None:
        chi2_base = __get_chi2_base(model)[0]
    if chi2_base == 0:
        return np.nan
    return (chi2_base - chi2) / chi2_base
def calc_pvals(model, z_scores=None, information='expected')

Calculate p-values.

Parameters

model : Model
Model.
z_scores : list, optional
Z-sores. The default is None.
information : str
If 'expected', expected Fisher information is used. Otherwise, observed information is employed. No use if std_errors are provided. The default is 'expected'.

Returns

list
P-values.
Expand source code
def calc_pvals(model, z_scores=None, information='expected'):
    """
    Calculate p-values.

    Parameters
    ----------
    model : Model
        Model.
    z_scores : list, optional
        Z-sores. The default is None.
    information : str
        If 'expected', expected Fisher information is used. Otherwise,
        observed information is employed. No use if std_errors are provided.
        The default is 'expected'.

    Returns
    -------
    list
        P-values.

    """
    if z_scores is None:
        z_scores = calc_zvals(model, information=information)
    return [2 * (1 - norm.cdf(abs(z))) for z in z_scores]
def calc_rmsea(model, chi2=None, dof=None)

Calculate RMSEA statistic.

Parameters

model : Model
Model.
chi2 : float, optional
chi2 statistic. The default is None.
dof : int, optional
Degrees of freedom statistic. The default is None.

Returns

float
RMSEA.
Expand source code
def calc_rmsea(model, chi2=None, dof=None):
    """
    Calculate RMSEA statistic.

    Parameters
    ----------
    model : Model
        Model.
    chi2 : float, optional
        chi2 statistic. The default is None.
    dof : int, optional
        Degrees of freedom statistic. The default is None.

    Returns
    -------
    float
        RMSEA.

    """
    if chi2 is None:
        chi2 = calc_chi2(model)[0]
    if dof is None:
        dof = calc_dof(model)
    if chi2 < dof:
        return 0
    return np.sqrt((chi2 / dof - 1) / (model.n_samples - 1))
def calc_se(model, information='expected', robust=False)

Calculate standard errors.

Parameters

model : Model
Model.
information : str
If 'expected', expected Fisher information is used. Otherwise, observed information is employed. The default is 'expected'.
robust : bool, optional
If True, then robust SE are computed instead. Robustness here means that MLR-esque sandwich correction is applied. The default is False.

Returns

list
list of standard errors of model's active parameters.
Expand source code
def calc_se(model, information='expected', robust=False):
    """
    Calculate standard errors.

    Parameters
    ----------
    model : Model
        Model.
    information : str
        If 'expected', expected Fisher information is used. Otherwise,
        observed information is employed. The default is 'expected'.
    robust : bool, optional
        If True, then robust SE are computed instead. Robustness here means
        that MLR-esque sandwich correction is applied. The default is False.

    Returns
    -------
    list
        list of standard errors of model's active parameters.

    """
    if robust or information != 'expected':
        # An explanation of "mult" for a reader:
        # When I was writing code and deriving some formulas for different
        # objective functions, I was finding myself quiet often dropping
        # constants when they appeared to be not necessary. Sometimes I didn't.
        # It is a mess. Here I am taking that in account. Some may say that
        # that I'd better just get those goddamn constants back in place
        # and get rid of this clusterfuck, and they'd be right, but I am
        # currently too hesitant to do it myself. 
        # My sincere apologies.
        # Georgy
        mult = model.n_samples / 2
        if hasattr(model, 'last_result'):
            fun, grad = model.get_objective(model.last_result.name_obj)
            if model.last_result.name_obj in ('FIML'):
                mult = 0.5
        else:
            try:
                fun, grad = model.get_objective('MatNorm')
            except KeyError:
                try:
                    fun, grad = model.get_objective('FIML')
                except KeyError:
                    fun, grad = model.get_objective('MLW')
                    mult = 1.0
        
    if information == 'expected':
        mx_inf = model.calc_fim(inverse=True)[1]
    else:
        from numdifftools import Gradient, Hessian
        if grad is None:
            hess = Hessian(fun)(model.param_vals)
        else:
            hess = Gradient(grad)(model.param_vals)
        mx_inf = np.linalg.pinv(hess) / mult
    if robust:
        g = sum(np.outer(g,g) for g in model.grad_se_g(model.param_vals))
        mx_inf = mx_inf @ g @ mx_inf
    variances = mx_inf.diagonal().copy()
    inds = (variances < 0) & (variances > -1e-1)
    variances[inds] = 1e-12
    variances[variances < 0] = np.nan  # So numpy won't throw a warning.
    return np.sqrt(variances) 
def calc_stats(model)

Calculate fit indices and other auxiliary statistics.

Parameters

model : Model
Fit model instance.

Returns

pd.DataFrame
Fit indices and statistics.
Expand source code
def calc_stats(model):
    """
    Calculate fit indices and other auxiliary statistics.

    Parameters
    ----------
    model : Model
        Fit model instance.

    Returns
    -------
    pd.DataFrame
        Fit indices and statistics.

    """
    if not hasattr(model, 'last_result'):
        raise Exception('Can''t gather statitics from model until it is fit.')
    try:
        lh = calc_likelihood(model)
    except np.linalg.LinAlgError:
        lh = np.nan
    aic = calc_aic(model, lh)
    bic = calc_bic(model, lh)
    dof = calc_dof(model)
    chi2_base, dof_base = __get_chi2_base(model)
    chi2 = calc_chi2(model, dof)
    rmsea = calc_rmsea(model, chi2[0], dof)
    cfi = calc_cfi(model, dof, chi2[0], dof_base, chi2_base)
    gfi = calc_gfi(model, chi2[0], chi2_base)
    agfi = calc_agfi(model, dof, dof_base, gfi)
    nfi = calc_nfi(model, chi2[0], chi2_base)
    tli = calc_tli(model, dof, chi2[0], dof_base, chi2_base)
    d = {'DoF': dof, 'DoF Baseline': dof_base,
         'chi2': chi2[0], 'chi2 p-value': chi2[1], 'chi2 Baseline': chi2_base,
         'CFI': cfi, 'GFI': gfi, 'AGFI': agfi, 'NFI': nfi, 'TLI': tli,
         'RMSEA': rmsea, 'AIC': aic, 'BIC': bic, 'LogLik': lh}
    return pd.DataFrame(d, index=['Value'])
def calc_tli(model, dof=None, chi2=None, dof_base=None, chi2_base=None)

Calculate TLI (Tucker and Lewis Index).

Parameters

model : Model
Model.
dof : int, optional
Degrees of freedom statistic. The default is None.
chi2 : float, optional
chi2 statistic. The default is None.
dof_base : int, optional
Degrees of freedom of baseline model. The default is None.
chi2_base : float, optional
chi2 statistic of baseline model. The default is None.

Returns

float
TLI.
Expand source code
def calc_tli(model, dof=None, chi2=None, dof_base=None, chi2_base=None):
    """
    Calculate TLI (Tucker and Lewis Index).

    Parameters
    ----------
    model : Model
        Model.
    dof : int, optional
        Degrees of freedom statistic. The default is None.
    chi2 : float, optional
        chi2 statistic. The default is None.
    dof_base : int, optional
        Degrees of freedom of baseline model. The default is None.
    chi2_base : float, optional
        chi2 statistic of baseline model. The default is None.

    Returns
    -------
    float
        TLI.

    """
    if chi2 is None:
        chi2 = calc_chi2(model)[0]
    if chi2_base is None or dof_base is None:
        chi2_base, dof_base = __get_chi2_base(model)
    if dof is None:
        dof = calc_dof(model)
    if dof == 0 or dof_base == 0:
        return np.nan
    a, b = chi2 / dof, chi2_base / dof_base
    return (b - a) / (b - 1)
def calc_zvals(model, std_errors=None, information='expected')

Calculate Z-score.

Parameters

model : Model
Model.
std_errors : list, optional
Standard errors. The default is None.
information : str
If 'expected', expected Fisher information is used. Otherwise, observed information is employed. No use if std_errors are provided. The default is 'expected'.

Returns

list
Z-scores.
Expand source code
def calc_zvals(model, std_errors=None, information='expected'):
    """
    Calculate Z-score.

    Parameters
    ----------
    model : Model
        Model.
    std_errors : list, optional
        Standard errors. The default is None.
    information : str
        If 'expected', expected Fisher information is used. Otherwise,
        observed information is employed. No use if std_errors are provided.
        The default is 'expected'.

    Returns
    -------
    list
        Z-scores.

    """
    if std_errors is None:
        std_errors = calc_se(model, information=information)
    return [val / (std + 1e-12) for val, std in zip(list(model.param_vals),
                                                    std_errors)]
def gather_statistics(model, information='expected', robust=False)

Retrieve all statistics as specified in SEMStatistics structure.

Needed for backwards-compaitability with semopy 1.+. Parameters


model : Model
Model.
information : str
If 'expected', expected Fisher information is used. Otherwise, observed information is employed. The default is 'expected'.
robust : bool, optional
If True, then robust SE are computed instead. Robustness here means that MLR-esque sandwich correction is applied. The default is False.

Raises

Exception
Rises when no data is loaded into the model.

Returns

SEMStatistics
Namedtuple containing all statistics available to semopy.
Expand source code
def gather_statistics(model, information='expected', robust=False):
    """
    Retrieve all statistics as specified in SEMStatistics structure.

    Needed for backwards-compaitability with semopy 1.+.
    Parameters
    ----------
    model : Model
        Model.
    information : str
        If 'expected', expected Fisher information is used. Otherwise,
        observed information is employed. The default is 'expected'.
    robust : bool, optional
        If True, then robust SE are computed instead. Robustness here means
        that MLR-esque sandwich correction is applied. The default is False.

    Raises
    ------
    Exception
        Rises when no data is loaded into the model.

    Returns
    -------
    SEMStatistics
        Namedtuple containing all statistics available to semopy.

    """
    if isinstance(model, Optimizer):
        model = model.model
    if not hasattr(model, 'last_result'):
        raise Exception('Can''t gather statitics from model until it is fit.')
    values = model.param_vals.copy()
    try:
        std_errors = calc_se(model, information=information, robust=robust)
    except np.linalg.LinAlgError:
        std_errors = np.array([np.nan] * len(values))
    z_scores = calc_zvals(model, std_errors)
    pvalues = calc_pvals(model, z_scores)
    try:
        lh = calc_likelihood(model)
    except np.linalg.LinAlgError:
        lh = np.nan
    aic = calc_aic(model, lh)
    bic = calc_bic(model, lh)
    paramStats = [ParameterStatistics(val, std, ztest, pvalue)
                  for val, std, ztest, pvalue
                  in zip(values, std_errors, z_scores, pvalues)]
    dof = calc_dof(model)
    chi2_base, dof_base = __get_chi2_base(model)
    chi2 = calc_chi2(model, dof)
    rmsea = calc_rmsea(model, chi2[0], dof)
    cfi = calc_cfi(model, dof, chi2[0], dof_base, chi2_base)
    gfi = calc_gfi(model, chi2[0], chi2_base)
    agfi = calc_agfi(model, dof, dof_base, gfi)
    nfi = calc_nfi(model, chi2[0], chi2_base)
    tli = calc_tli(model, dof, chi2[0], dof_base, chi2_base)
    return SEMStatistics(dof, model.last_result.fun, lh, chi2, dof_base,
                         chi2_base, rmsea, cfi, gfi, agfi, nfi, tli, aic, bic,
                         paramStats)
def get_baseline_model(model, data=None)

Retrieve a the baseline model from given model.

Baseline model here is an independence model where all variables are considered to be independent with zero covariance. Only variances are estimated. Parameters


model : Model
Model.
data : pd.DataFrame, optional
Data, extracting from model will be attempted if no data provided. (It's assumed that model.load took place). The default is None.
cov : pd.DataFrame, optional
Covariance matrix can be provided in the lack of presense of data. The default is None.

Returns

mod : Model
Baseline model.
Expand source code
def get_baseline_model(model, data=None):
    """
    Retrieve a the baseline model from given model.

    Baseline model here is an independence model where all variables are
    considered to be independent with zero covariance. Only variances are
    estimated.
    Parameters
    ----------
    model : Model
        Model.
    data : pd.DataFrame, optional
        Data, extracting from model will be attempted if no data provided.
        (It's assumed that model.load took place). The default is None.
    cov : pd.DataFrame, optional
        Covariance matrix can be provided in the lack of presense of data.
        The default is None.

    Returns
    -------
    mod : Model
        Baseline model.

    """
    if type(model) is str:
        mod = Model(model, baseline=True)
        if data:
            mod.load(data)
        return mod
    desc = model.description
    tp = type(model)
    tp_name = tp.__name__
    if tp_name not in ('Model', ):
        mod = tp(desc, baseline=True, intercepts=model.intercepts)
    else:
        mod = tp(desc, baseline=True)
    try:
        if not data:
            if hasattr(model, 'mx_data'):
                mx = model.mx_data
                vs = model.vars['observed']
                if hasattr(model, 'mx_g'):
                    mx = np.append(mx, model.mx_g.T, axis=1)
                    vs = vs + model.vars['observed_exogenous']
                data = pd.DataFrame(data=mx, columns=vs)
                if hasattr(model, 'group'):
                    data[model.group] = model.group_data[0, :]
                if tp_name in ('ModelEffects',):
                    mod.load(data, group=model.group, k=model.passed_k)
                else:
                    mod.load(data)
            else:
                data = pd.DataFrame(data=model.mx_cov,
                                    index=model.vars['observed'],
                                    columns=model.vars['observed']) 
                mod.load(cov=data, n_samples=model.n_samples)
    except AttributeError:
        pass
    return mod

Classes

class ParameterStatistics (value, se, zscore, pvalue)

ParametersStatistics(value, se, zscore, pvalue)

Ancestors

  • builtins.tuple

Instance variables

var pvalue

Alias for field number 3

var se

Alias for field number 1

var value

Alias for field number 0

var zscore

Alias for field number 2

class SEMStatistics (dof, ml, fun, chi2, dof_baseline, chi2_baseline, rmsea, cfi, gfi, agfi, nfi, tli, aic, bic, params)

SEMStatistics(dof, ml, fun, chi2, dof_baseline, chi2_baseline, rmsea, cfi, gfi, agfi, nfi, tli, aic, bic, params)

Ancestors

  • builtins.tuple

Instance variables

var agfi

Alias for field number 9

var aic

Alias for field number 12

var bic

Alias for field number 13

var cfi

Alias for field number 7

var chi2

Alias for field number 3

var chi2_baseline

Alias for field number 5

var dof

Alias for field number 0

var dof_baseline

Alias for field number 4

var fun

Alias for field number 2

var gfi

Alias for field number 8

var ml

Alias for field number 1

var nfi

Alias for field number 10

var params

Alias for field number 14

var rmsea

Alias for field number 6

var tli

Alias for field number 11