events.py 5.97 KB
Newer Older
1
2
3
4
5
6
7
8
"""
A suite of functions that assist in carrying out the event-detection analysis
=============================================================================

!! Please import as a module and use it as such -- not as a script !!

"""
# Created: Thu May 26, 2016  03:50PM
9
# Last modified: Thu Jan 11, 2018  02:26PM
10
11
12
13
14
15
# Copyright: Bedartha Goswami <goswami@pik-potsdam.de>

import numpy as np
from scipy.stats import percentileofscore as percentile

from networks import link_density
16
from utils import _printmsg
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from utils import _progressbar_start
from utils import _progressbar_update
from utils import _progressbar_finish


def community_strength(G):
    """
    Returns intra-community link fraction for partition in two equal parts.

    Traditionally, modularity is estimated as the difference between the
    total fraction of links that fall within communities and the expectation
    value of the same quantity from a random network with the same degree
    sequence. The quantity returned by this fraction is simply the first
    term in the traditional definition of modularity. However, this is
    estimated only for one specific partition of the considered (recurrence)
    network, viz. the partition at the midpoint of time-ordering of the
    network which divides the entire graph into two subgraphs of equal size.

    Parameters
    ----------
    G : igraph Graph
        An igraph Graph object that can be weighted or unweighted, but is
        not directed, for which the link density is to be estimated.

    Returns
    -------
    rho : scalar, float
        The estimated total fraction of links that fall within the two
        communities of the recurrence network G.

    See Also
    --------
    igraph.Graph.density
    modularity

    """
    n = G.vcount()
    tot_possible_edges = 0.5 * float(n * (n - 1))
    G1, G2 = __divide_at_midpoint(G)
    if G.is_weighted():
        edges1, edges2 = np.sum(G1.es["weight"]), np.sum(G2.es["weight"])
    else:
        edges1, edges2 = G1.ecount(), G2.ecount()
    rho = float(edges1 + edges2) / tot_possible_edges
    return rho


def __divide_at_midpoint(G):
    """
    Returns subgraphs by cutting the recurrence network at the midpoint.
    """
    n = G.vcount()
69
    inds1, inds2 = range(0, int(n / 2)), range(int(n / 2), n)
70
71
72
73
    G1, G2 = G.subgraph(G.vs[inds1]), G.subgraph(G.vs[inds2])
    return G1, G2


74
def community_strength_data(G, time, wsize, wstep,
75
                            verbose=False, pbar=False):
76
77
78
79
80
81
82
83
84
    """
    Saves intra-community link fraction for specified data set.
    """
    _printmsg("COMMUNITY STRENGTH OF OBSERVED DATA", verbose)
    _printmsg("===================================", verbose)
    _printmsg("Loading data...", verbose)
    n = G.vcount()
    nwind = ((n - wsize) / wstep) + 1
    tmid = []
85
    Q, LD = [np.zeros(int(nwind)) for i in range(2)]
86
87
    _printmsg("Estimating intra-community link fraction...", verbose)
    prog_bar = _progressbar_start(n, pbar)
88
    for i in range(int(nwind)):
89
90
91
92
93
94
95
96
97
        k = i * wstep
        start, stop = k, k + wsize
        t_ = (time[start:stop]).sum() / wsize
        tmid.append(t_)
        G_ = G.subgraph(G.vs[start:stop])
        Q[i] = community_strength(G_)
        LD[i] = link_density(G_)
        _progressbar_update(prog_bar, i)
    _progressbar_finish(prog_bar)
98
    return Q, tmid
99
100


101
def community_strength_random_model(G, time, wsize, wstep, nsurr,
102
                                    verbose=False, pbar=False):
103
104
105
106
107
108
109
110
111
    """
    Saves intra-community link fraction for surrogates of specified data set.
    """
    _printmsg("COMMUNITY STRENGTH OF RANDOM MODEL", verbose)
    _printmsg("==================================", verbose)
    _printmsg("Loading data...", verbose)
    n = G.vcount()
    nwind = ((n - wsize) / wstep) + 1
    tmid = []
112
    Qsurr, LDsurr = [np.zeros((int(nwind), int(nsurr)), "float") for i in range(2)]
113
    _printmsg("Estimating intra-community link fraction...", verbose)
114
    prog_bar = _progressbar_start(nwind * nsurr, pbar)
115
    count = 0
116
    for i in range(int(nwind)):
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
        k = i * wstep
        start, stop = k, k + wsize
        t_ = (time[start:stop]).sum() / wsize
        tmid.append(t_)
        G_ = G.subgraph(G.vs[start:stop])
        if G.density() > 0.:
            wts_ = G_.es["weight"]
            str_ = np.array(G_.strength(G_.vs, weights=wts_))
            deg = np.round(str_).astype("int").tolist()
            for j in range(nsurr):
                if sum(deg) % 2 != 0:
                    deg_nonzero = np.where(np.array(deg) > 0)[0]
                    idx = np.random.randint(0, len(deg_nonzero))
                    idx = deg_nonzero[idx]
                    deg[idx] = deg[idx] - 1
                Gsurr_ = G_.Degree_Sequence(out=deg, method="simple")
                Qsurr[i, j] = community_strength(Gsurr_)
                LDsurr[i, j] = Gsurr_.density()
                count += 1
        else:
            Qsurr[i] = np.nan
            LDsurr[i] = np.nan
        _progressbar_update(prog_bar, count)
    _progressbar_finish(prog_bar)
141
    return Qsurr, tmid, LDsurr
142
143


144
def pvalues(Q, Qsurr, nwind, verbose=False, pbar=False):
145
146
147
148
149
150
151
152
153
154
155
    """
    Returns the p-value of obtaining intra-community link density.
    """
    _printmsg("Estimating p-value...", verbose)
    pvalue = np.zeros(nwind)
    prog_bar = _progressbar_start(nwind, pbar)
    for i in range(nwind):
        Qnull = Qsurr[i]
        pvalue[i] = 1. - percentile(Qnull, Q[i], kind="weak") / 100.
        _progressbar_update(prog_bar, i)
    _progressbar_finish(prog_bar)
Bedartha Goswami's avatar
Bedartha Goswami committed
156
    return pvalue
157
158


159
def holm(pvals, alpha=0.05, corr_type="dunn"):
160
161
162
    """
    Returns indices of p-values using Holm's method for multiple testing.
    """
163
164
165
    n = len(pvals)
    sortidx = np.argsort(pvals)
    p_ = pvals[sortidx]
166
167
    j = np.arange(1, n + 1)
    if corr_type == "bonf":
168
        corr_factor = alpha / (n - j + 1)
169
170
171
172
173
174
175
176
    elif corr_type == "dunn":
        corr_factor = 1. - (1. - alpha) ** (1. / (n - j + 1))
    try:
        idx = np.where(p_ <= corr_factor)[0][-1]
        idx = sortidx[:idx]
    except IndexError:
        idx = []
    return idx