-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d8c59dd
commit a5e57e2
Showing
1 changed file
with
201 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
import gudhi | ||
import numpy as np | ||
from numpy import matlib | ||
import random | ||
import gensim | ||
from gensim.models import Word2Vec | ||
import scipy.sparse | ||
|
||
# Some function that will be useful for the rest of the code | ||
|
||
|
||
def signed_faces(s): | ||
## returns the faces of the simplex s | ||
ret = [] | ||
for i in range(0, len(s)): | ||
ret.append(((-1) ** i, s[0:i] + s[i + 1 : len(s)])) | ||
return ret | ||
|
||
|
||
def signed_cofaces(s, cplx): | ||
# returns all cofaces (of codim 1) of simplex s | ||
return [(sign(s, x), x) for (x, eps) in cplx.get_cofaces(s, 1)] | ||
|
||
|
||
def sign(s, t): # Sign of s in t. | ||
if len(t) != len(s) + 1: | ||
return None | ||
|
||
for i in range(0, len(s)): | ||
if s[i] != t[i]: | ||
return (-1) ** i | ||
|
||
return (-1) ** len(s) | ||
|
||
|
||
def hacky_get_idx(s, cplx): | ||
# Get index of the simplices | ||
i = cplx.filtration(s) | ||
assert i.is_integer() | ||
return int(i) | ||
|
||
|
||
def assemble(cplx, k, scheme="uniform", laziness=None): | ||
## Assmeble the transition matrix | ||
# We are using this incredibly ugly hack to store the indices of the simplices | ||
# as filtration values since keys are not accessible | ||
# through the GUDHI Python API. | ||
assert cplx.num_simplices() < 16777217 | ||
|
||
assert scheme in ["uniform", "uniform-lazy", "uniform-multicount"] | ||
if scheme == "uniform-lazy": | ||
assert laziness is not None | ||
assert laziness >= 0 and laziness <= 1 | ||
|
||
simplices = [s for (s, eps) in cplx.get_filtration()] | ||
|
||
ordering = [] | ||
N = 0 | ||
for s in simplices: | ||
if len(s) == k + 1: | ||
cplx.assign_filtration(s, float(N)) | ||
ordering.append(s) | ||
N += 1 | ||
else: | ||
cplx.assign_filtration(s, np.inf) | ||
|
||
cplx.initialize_filtration() | ||
|
||
row_inds = [] | ||
col_inds = [] | ||
data = [] | ||
|
||
for s, i in cplx.get_filtration(): | ||
if i >= N: | ||
break | ||
|
||
assert i.is_integer() | ||
i = int(i) | ||
|
||
# uniform, uniform-lazy, uniform-multicount | ||
if scheme.startswith("uniform"): | ||
|
||
s_faces = signed_faces(s) | ||
s_cofaces = signed_cofaces(s, cplx) | ||
|
||
s_up = [] | ||
for a, t in s_cofaces: | ||
s_up += [(-a * b, u) for (b, u) in signed_faces(t)] | ||
|
||
s_down = [] | ||
for a, t in s_faces: | ||
s_down += [(-a * b, u) for (b, u) in signed_cofaces(t, cplx)] | ||
|
||
## We are not considering orientations so we set all signs to 1 | ||
s_up = [(1, t) for (foo, t) in s_up] | ||
s_down = [(1, t) for (foo, t) in s_down] | ||
|
||
if scheme == "uniform-multicount": | ||
s_neigh_idxs = [(a, hacky_get_idx(t, cplx)) for (a, t) in s_down + s_up] | ||
else: | ||
s_neigh_idxs = list( | ||
set([(a, hacky_get_idx(t, cplx)) for (a, t) in s_down + s_up]) | ||
) | ||
|
||
if scheme == "uniform-lazy": | ||
if len(s_neigh_idxs) == 1: | ||
probs = 0.0 | ||
else: | ||
num_self_neigh = 0 | ||
for sgn, j in s_neigh_idxs: | ||
if j == i: | ||
num_self_neigh += 1 | ||
probs = (1.0 - laziness) / (len(s_neigh_idxs) - num_self_neigh) | ||
else: | ||
probs = 1.0 / len(s_neigh_idxs) | ||
|
||
for sgn, j in s_neigh_idxs: | ||
row_inds.append(i) | ||
col_inds.append(j) | ||
if scheme == "uniform-lazy" and j == i: | ||
data.append(laziness) | ||
else: | ||
data.append(probs) | ||
|
||
return scipy.sparse.csr_matrix((data, (row_inds, col_inds)), shape=(N, N)) | ||
|
||
|
||
def walk(smplx, walk_length, P): | ||
## Performs a single random walk of fixed length starting at smplx | ||
# smplx = starting simplex of the random walk | ||
# P = precomputed transition matrix on the complex containing smplx | ||
# walk_length = length of the random walk | ||
c = np.arange(P.shape[0]) | ||
RW = [] | ||
RW.append(smplx) | ||
for i in range(walk_length): | ||
smplx = np.random.choice(c, size=1, p=P[smplx])[0] | ||
RW.append(smplx) | ||
return RW | ||
|
||
|
||
def RandomWalks(walk_length, number_walks, P, seed=None): | ||
## Performs a fixed number of random walks at each $k$-simplex | ||
Walks = [] ## List where we store all random walks of length walk_length | ||
for i in range(number_walks): | ||
for smplx in range(P.shape[0]): | ||
Walks.append(walk(smplx, walk_length, P)) | ||
if seed != None: | ||
np.random.seed(seed) | ||
np.random.shuffle(Walks) | ||
else: | ||
np.random.shuffle(Walks) | ||
return Walks | ||
|
||
|
||
def save_random_walks(Walks, filename): | ||
## Writes the walks in a .txt file | ||
file = open(filename, "a") | ||
for walk in Walks: | ||
L = str(walk)[1:-1] + "\n" | ||
file.write(L) | ||
file.close() | ||
|
||
|
||
def load_walks(filename): | ||
## Loads a file with precomputed random walks | ||
file = open(filename, "r") | ||
lines = file.readlines() | ||
walks = list() | ||
for line in lines: | ||
walk = list() | ||
line = line[0:-1] | ||
newline = line.split("], [") | ||
for el in newline: | ||
step = [int(s) for s in el.split(", ")] | ||
walk.append(step) | ||
walks.append(walk[0]) | ||
return walks | ||
|
||
|
||
def Embedding(Walks, emb_dim, epochs=5, filename="k-simplex2vec_embedding.model"): | ||
## Performs the embedding of the $k$-simplices using the gensim word2vec package | ||
walks_str = [] | ||
for i in range(len(Walks)): | ||
ls_temp = [] | ||
for j in range(len(Walks[i])): | ||
string = str(Walks[i][j]).replace(" ", "") | ||
ls_temp.append(string) | ||
walks_str.append(ls_temp) | ||
|
||
model = Word2Vec( | ||
walks_str, | ||
vector_size=emb_dim, | ||
window=3, | ||
min_count=0, | ||
sg=1, | ||
workers=1, | ||
epochs=epochs, | ||
) | ||
model.save(filename) | ||
return model |