""" A suite of functions that assist in carrying out the event-detection analysis ============================================================================= !! Please import as a module and use it as such -- not as a script !! """ # Created: Thu May 26, 2016 03:50PM # Last modified: Thu Jan 11, 2018 02:26PM # Copyright: Bedartha Goswami import numpy as np from scipy.stats import percentileofscore as percentile from networks import link_density from utils import _printmsg from utils import _progressbar_start from utils import _progressbar_update from utils import _progressbar_finish def community_strength(G): """ Returns intra-community link fraction for partition in two equal parts. Traditionally, modularity is estimated as the difference between the total fraction of links that fall within communities and the expectation value of the same quantity from a random network with the same degree sequence. The quantity returned by this fraction is simply the first term in the traditional definition of modularity. However, this is estimated only for one specific partition of the considered (recurrence) network, viz. the partition at the midpoint of time-ordering of the network which divides the entire graph into two subgraphs of equal size. Parameters ---------- G : igraph Graph An igraph Graph object that can be weighted or unweighted, but is not directed, for which the link density is to be estimated. Returns ------- rho : scalar, float The estimated total fraction of links that fall within the two communities of the recurrence network G. See Also -------- igraph.Graph.density modularity """ n = G.vcount() tot_possible_edges = 0.5 * float(n * (n - 1)) G1, G2 = __divide_at_midpoint(G) if G.is_weighted(): edges1, edges2 = np.sum(G1.es["weight"]), np.sum(G2.es["weight"]) else: edges1, edges2 = G1.ecount(), G2.ecount() rho = float(edges1 + edges2) / tot_possible_edges return rho def __divide_at_midpoint(G): """ Returns subgraphs by cutting the recurrence network at the midpoint. """ n = G.vcount() inds1, inds2 = range(0, int(n / 2)), range(int(n / 2), n) G1, G2 = G.subgraph(G.vs[inds1]), G.subgraph(G.vs[inds2]) return G1, G2 def community_strength_data(G, time, wsize, wstep, verbose=False, pbar=False): """ Saves intra-community link fraction for specified data set. """ _printmsg("COMMUNITY STRENGTH OF OBSERVED DATA", verbose) _printmsg("===================================", verbose) _printmsg("Loading data...", verbose) n = G.vcount() nwind = ((n - wsize) / wstep) + 1 tmid = [] Q, LD = [np.zeros(int(nwind)) for i in range(2)] _printmsg("Estimating intra-community link fraction...", verbose) prog_bar = _progressbar_start(n, pbar) for i in range(int(nwind)): k = i * wstep start, stop = k, k + wsize t_ = (time[start:stop]).sum() / wsize tmid.append(t_) G_ = G.subgraph(G.vs[start:stop]) Q[i] = community_strength(G_) LD[i] = link_density(G_) _progressbar_update(prog_bar, i) _progressbar_finish(prog_bar) return Q, tmid def community_strength_random_model(G, time, wsize, wstep, nsurr, verbose=False, pbar=False): """ Saves intra-community link fraction for surrogates of specified data set. """ _printmsg("COMMUNITY STRENGTH OF RANDOM MODEL", verbose) _printmsg("==================================", verbose) _printmsg("Loading data...", verbose) n = G.vcount() nwind = ((n - wsize) / wstep) + 1 tmid = [] Qsurr, LDsurr = [np.zeros((int(nwind), int(nsurr)), "float") for i in range(2)] _printmsg("Estimating intra-community link fraction...", verbose) prog_bar = _progressbar_start(nwind * nsurr, pbar) count = 0 for i in range(int(nwind)): k = i * wstep start, stop = k, k + wsize t_ = (time[start:stop]).sum() / wsize tmid.append(t_) G_ = G.subgraph(G.vs[start:stop]) if G.density() > 0.: wts_ = G_.es["weight"] str_ = np.array(G_.strength(G_.vs, weights=wts_)) deg = np.round(str_).astype("int").tolist() for j in range(nsurr): if sum(deg) % 2 != 0: deg_nonzero = np.where(np.array(deg) > 0)[0] idx = np.random.randint(0, len(deg_nonzero)) idx = deg_nonzero[idx] deg[idx] = deg[idx] - 1 Gsurr_ = G_.Degree_Sequence(out=deg, method="simple") Qsurr[i, j] = community_strength(Gsurr_) LDsurr[i, j] = Gsurr_.density() count += 1 else: Qsurr[i] = np.nan LDsurr[i] = np.nan _progressbar_update(prog_bar, count) _progressbar_finish(prog_bar) return Qsurr, tmid, LDsurr def pvalues(Q, Qsurr, nwind, verbose=False, pbar=False): """ Returns the p-value of obtaining intra-community link density. """ _printmsg("Estimating p-value...", verbose) pvalue = np.zeros(nwind) prog_bar = _progressbar_start(nwind, pbar) for i in range(nwind): Qnull = Qsurr[i] pvalue[i] = 1. - percentile(Qnull, Q[i], kind="weak") / 100. _progressbar_update(prog_bar, i) _progressbar_finish(prog_bar) return pvalue def holm(pvals, alpha=0.05, corr_type="dunn"): """ Returns indices of p-values using Holm's method for multiple testing. """ n = len(pvals) sortidx = np.argsort(pvals) p_ = pvals[sortidx] j = np.arange(1, n + 1) if corr_type == "bonf": corr_factor = alpha / (n - j + 1) elif corr_type == "dunn": corr_factor = 1. - (1. - alpha) ** (1. / (n - j + 1)) try: idx = np.where(p_ <= corr_factor)[0][-1] idx = sortidx[:idx] except IndexError: idx = [] return idx