diff --git a/geneRegulation/analyseExprs.py b/geneRegulation/analyseExprs.py new file mode 100644 index 0000000000000000000000000000000000000000..c3b739364426e6b1f4aaadf1b186358c0ade287c --- /dev/null +++ b/geneRegulation/analyseExprs.py @@ -0,0 +1,556 @@ +from __future__ import print_function +from collections import defaultdict +from operator import add +from functools import reduce +from itertools import combinations, permutations +from pprint import pprint +import numpy as np +import networkx as nx +import seg + +geneNms = [ + 'AP3', + 'REV', + 'SUP', + 'AHP6', + 'PHB_PHV', + 'SEP1_2', + 'PI', + 'LFY', + 'AS1', + 'AG', + 'ATML1', + 'STM', + 'AP1_2', + 'L2', + 'L1', + 'ETTIN', + 'SEP3', + 'FIL', + 'WUS', + 'PUCHI', + 'ANT', + 'CUC1_2_3', + 'MP', + 'CLV3', + ] + +geneFs = { + 'AUX': 'growth', + 'AP3': 'identity', + 'REV': 'polarity', + 'SUP': 'meristem', + 'AHP6': 'growth', + 'PHB_PHV': 'polarity', + 'SEP1_2': 'identity', + 'PI': 'identity', + 'LFY': 'growth', + 'AS1': 'polarity', + 'AG': 'identity', + 'STM' : 'meristem', + 'AP1_2': 'identity', + 'ETTIN': 'polarity', + 'SEP3' : 'identity', + 'FIL' : 'polarity', + 'WUS': 'meristem', + 'PUCHI' : 'growth', + 'ANT' : 'growth', + 'CUC1_2_3': 'meristem', + 'MP' : 'growth', + 'CLV3': 'meristem', + 'KAN1': 'polarity', + 'CLV1': 'meristem' + } + +class Interaction(object): + def __init__(self, inputs, out, w): + self.inputs = inputs + self.out = out + self.w = w + self.tt = None + + def __repr__(self): + inputsRepr = ",".join(self.inputs) + + return inputsRepr + " --> " + self.out + + def _getExprs(self, c): + inputExprs = tuple([c.exprs[i] for i in self.inputs]) + outExpr = c.exprs[self.out] + + return (inputExprs, outExpr) + + def mkTruthTable(self, cells): + #get truth table per interaction, which can then be compared to + #known truth table to get the interactions rules + ttable = set(self._getExprs(c) for c in cells) + self.tt = ttable + return ttable + + def partOf(self, i, o): + return i in self.inputs and self.out == o + + def toPairs(self): + return [(i, self.out) for i in self.inputs] + + def toWPairs(self): + return [(i, self.out, {'weight':self.w, 'tt':self.tt}) for i in self.inputs] + + +def readDataT(d, dExprs, t): + from os.path import join + + geomF = "_segmented_tvformat_0_volume_position.txt" + neighF = "_segmented_tvformat_0_neighbors_py.txt" + exprF = "h.txt" + + fnGeom = join(d, "t" + str(t) + geomF) + fnNeigh = join(d, "t" + str(t) + neighF) + fnExpr = join(dExprs, "t_" + str(t) + exprF) + + gexprs = seg.readExprs(fnExpr) + ts = seg.STissue.fromTV(fnGeom, fnNeigh, gexprs, seg.geneNms) + + return list(iter(ts)) + +def readDataT1(d, dExprs, t): + from os.path import join + + geomF = "_segmented_tvformat_0_volume_position.txt" + neighF = "_segmented_tvformat_0_neighbors_py.txt" + exprF = "h.txt" + + fnGeom = join(d, "t" + str(t) + geomF) + fnNeigh = join(d, "t" + str(t) + neighF) + fnExpr = join(dExprs, "t_" + str(t) + exprF) + + gexprs = seg.readExprs(fnExpr) + ts = seg.STissue.fromTV(fnGeom, fnNeigh, gexprs, seg.geneNms) + + return ts + +def readDataT1Im(d, dExprs, t): + from os.path import join + + exprF = "h.txt" + fnRes = join(d, "resolutions.txt") + fnSeg = join(d, "segmentations/t"+str(t)+"_segmented.tif") + fnExpr = join(dExprs, "t_" + str(t) + exprF) + + + data, _ = seg.readImage(fnSeg) + ress = seg.readRess(fnRes) + res = ress[t] + + gexprs = seg.readExprs(fnExpr) + ts = seg.STissue.fromImage(data, res, gexprs, seg.geneNms) + + for c in ts: + c.swapZX() + + return ts + +def readData(d, dExprs): + timepoints = [10, 40, 96, 120, 132] + cells = [] + + for t in timepoints: + print(t) + cells.append(readDataT(d, dExprs, t)) + + return reduce(add, cells) + +def initPs(geneNms, k, val): + ps = dict() + for genes in combinations(geneNms, k): + ps[frozenset(genes)] = np.copy(val) + + return ps + +def initM(geneNms, outGs, k): + ps = dict() + for genes in combinations(geneNms, k): + ps[frozenset(genes)] = dict((g, 0.0) for g in outGs) + + return ps + +def ps(cells, km): + P = dict() + N = len(cells) + + for k in range(1, km+2): + P[k] = initPs(geneNms, k, np.zeros(tuple(2 for _ in range(k)))) + + for k in range(1, km+2): + for cell in cells: + for gks in combinations(geneNms, k): + counts = P[k][frozenset(gks)] + gkExprs = tuple([int(cell.exprs[g]) for g in list(gks)]) + counts[gkExprs] += 1 + + for k in range(1, km+2): + for gks in combinations(geneNms, k): + P[k][frozenset(gks)] /= N + + return P + +def psGk(cells, gks): + k = len(gks) + N = len(cells) + counts = np.zeros(tuple(2 for _ in range(k))) + + for cell in cells: + gkExprs = tuple([int(cell.exprs[g]) for g in list(gks)]) + counts[gkExprs] += 1 + + return (counts / N) + +def entropy(P): + return -np.sum(P * np.log(P, out=np.zeros_like(P), where=(P!=0))) + +def mkEntropies(P, km): + H = dict() + for k in range(1, km+2): + H[k] = initPs(geneNms, k, 0.0) + + for k in range(1, km+2): + for gks in combinations(geneNms, k): + H[k][frozenset(gks)] = entropy(P[k][frozenset(gks)]) + + return H + +def mutualInfo(H, outGs, k): + hOut = H[1] + hIn = H[k] + hJoint = H[k+1] + + M = initM(geneNms, outGs, k) + + for inputs in combinations(geneNms, k): + for out in outGs: + if not out in inputs: + inputOut = frozenset(inputs + (out, )) + M[frozenset(inputs)][out] = (hOut[frozenset([out])] + + hIn[frozenset(inputs)] + - hJoint[inputOut]) + + return M + +def getOuts(interactions): + return [intr.out for intr in interactions] + +def getInteractions(M, H, outGs, k, s): + hOut = H[1] + interactions = [] + + for inputs in combinations(geneNms, k): + for out in outGs: + if (not out in inputs + and ((M[frozenset(inputs)][out]/ hOut[frozenset([out])]) >= s) + and not hOut[frozenset([out])] == 0): + w = M[frozenset(inputs)][out]/ hOut[frozenset([out])] + interactions.append(Interaction(inputs, out, w)) + + return interactions + +def go(cells, kmax, s): + out = set(geneNms) + outDet = set([]) + + P = ps(cells, kmax) + H = mkEntropies(P, kmax) + + interactions = [] + + for k in range(1, kmax+1): + out = out.difference(outDet) + M = mutualInfo(H, out, k) + intrs = getInteractions(M, H, out, k, s) + interactions += intrs + outDet = getOuts(intrs) + + return (P, interactions) + +def printTTable(ttable): + ios = list(ttable) + + for (inputs, out) in ios: + print(",".join(str(int(i)) for i in inputs) + " --> " + str(int(out))) + + return + +def satTT(tt, ttIntr): + return all([(inputs, out) in tt for inputs, out in ttIntr]) + +def checkIntr(intr, cells): + idTT = {((False,), False), ((True, ), True)} + notTT = {((False, ), True), ((True,), False)} + + orTT = {((False, False), False), ((False, True), True), + ((True, False), True), ((True, True), True)} + xorTT = {((False, False), False), ((False, True), True), + ((True, False), True), ((True, True), False)} + andTT = {((False, False), False), ((False, True), False), + ((True, False), False), ((True, True), True)} + + ttNms = ['ID', 'NOT', 'OR', 'XOR', 'AND'] + tts = [idTT, notTT, orTT, xorTT, andTT] + ttIntr = intr.mkTruthTable(cells) + + return [ttNms[i] for i, tt in enumerate(tts) if satTT(tt, ttIntr)] + + +def printInteractions(interactions, cells): + for intr in interactions: + print(intr) + print(",".join(checkIntr(intr, cells))) + printTTable(intr.tt) + print() + + return + +def concatMap(xs, f): + return reduce(add, map(f, xs), []) + +def toGraph(interactions, cells): + import networkx as nx + + for intr in interactions: + intr.mkTruthTable(cells) + + G = nx.DiGraph() + G.add_edges_from(concatMap(interactions, lambda i: i.toWPairs())) + + return G + +def mkWGraphRef(interactions, ref, cells): + import networkx as nx + #draw the ref graph with weights from the data-extracted interactions + G = toGraph(interactions, cells) + + wRef = [] + for i, o in ref: + try: + wRef.append((i, o, G.adj[i][o]['weight'])) + except KeyError: + wRef.append((i, o, 0.0)) + + Gref = nx.DiGraph() + Gref.add_weighted_edges_from(wRef) + + return Gref + +def filterRef(geneNms, ref): + genes = set(geneNms) + + return [(i, o) for i, o in ref if i in genes and o in genes] + +def createRefData(geneNms, k): + return [(g1, g2) for g1, g2 in permutations(geneNms, k) if not g1 == g2] + +def drawGraph(G): + import networkx as nx + import matplotlib.pyplot as plt + import matplotlib as mpl + + pos = nx.layout.circular_layout(G) + + nodes = nx.draw_networkx_nodes(G, pos, node_size=700) + edges = nx.draw_networkx_edges(G, pos, width=2) + nx.draw_networkx_labels(G, pos, font_size=14, font_family='sans-serif') + + ax = plt.gca() + ax.set_axis_off() + plt.show() + +def drawWGraph(G, w0=False): + import networkx as nx + import matplotlib.pyplot as plt + import matplotlib as mpl + + ws = nx.get_edge_attributes(G, 'weight') + + if w0: + ews = [ws[e] for e in G.edges() if ws[e] > 0.0] + edgesToDraw = [e for e in G.edges() if ws[e] > 0.0] + else: + ews = [ws[e] for e in G.edges()] + edgesToDraw = G.edges() + + pos = nx.layout.circular_layout(G) + + print(ews) + print(edgesToDraw) + + nodes = nx.draw_networkx_nodes(G, pos, node_size=500, alpha=0.6) + edges = nx.draw_networkx_edges(G, pos, edge_list=edgesToDraw, edge_color=ews, + edge_cmap=plt.cm.Blues, width=2) + nx.draw_networkx_labels(G, pos, font_size=10, font_family='sans-serif') + + pc = mpl.collections.PatchCollection(edges, cmap=plt.cm.Blues) + pc.set_array(ews) + plt.colorbar(pc) + + ax = plt.gca() + ax.set_axis_off() + plt.show() + +def circularOrdered(nodes, centre=(0, 0), r=1): + #like nx circular positioning but preserves the order of the nodes + + N = len(nodes) + thetas = np.linspace(0, 2*np.pi, N+1) + + poss = np.zeros([N+1, 2]) + for (i, th) in enumerate(thetas): + a, b = centre + x = a + r*np.cos(th) + y = b + r*np.sin(th) + poss[i, :] = [x, y] + + return dict(zip(nodes, poss)) + +def getW(ws, p): + a, b = p + + try: + return ws[p] + except KeyError: + return ws[(b, a)] + + return + +def rescale(w, p): + d, h = p + + return w * (h - d) + d + +def inRef(intr, refComps): + return intr[0] in refComps or intr[1] in refComps + +def drawRefData(interactions, interactionsC, cells, ref, geneF, refOnly=False, engine='circo'): + import networkx as nx + import matplotlib.pyplot as plt + import matplotlib as mpl + + fColours = {'identity': '#FCFFE5', + 'growth': '#8BB9FF', + 'polarity': '#FFE1BB', + 'meristem': '#ED9EAE', + '': '#FFFFFF'} + + sref = set(ref) + Gref = mkWGraphRef(interactions, ref, cells) + + newIntrs = [] + for intr in interactionsC: + if (any ([not bintr in sref and inRef(bintr, Gref.nodes()) + for bintr in intr.toPairs()])): + newIntrs.append(intr) + + nIntrPairs = concatMap(newIntrs, lambda i: i.toPairs()) + Gref.add_edges_from(nIntrPairs) + + ws = nx.get_edge_attributes(Gref, 'weight') + ews = [rescale(getW(ws, e), (0.0, 1.0)) for e in ref] + + ncs = [fColours[geneF.get(g, '')] for g in Gref.nodes()] + nodesF = map(lambda x: x[0], + sorted([(n, geneF.get(n, '')) for n in Gref.nodes()], + key=lambda p: p[1])) + pos = nx.nx_agraph.graphviz_layout(Gref, prog=engine)# + + nodes = nx.draw_networkx_nodes(Gref, pos, node_color=ncs, + alpha=0.9, node_size=700) + if not refOnly: + nx.draw_networkx_edges(Gref, pos, width=1, edge_color='r', + edgelist=nIntrPairs) + wedges = nx.draw_networkx_edges(Gref, pos, width=1, edgelist=ref, + edge_cmap=plt.cm.Blues, edge_color=ews, + edge_vmin=0.0, edge_vmax=1.0) + nx.draw_networkx_labels(Gref, pos, font_size=10) + wsLabels = dict([(p, str(round(w, 1))) for (p, w) in ws.iteritems()]) + nx.draw_networkx_edge_labels(Gref, pos, edge_labels=wsLabels) + + + pc = mpl.collections.PatchCollection(wedges, cmap=plt.cm.Blues) + pc.set_array(ews) + pc.set_clim(vmin=0.0, vmax=1.0) + plt.colorbar(pc) + + ax = plt.gca() +# ax.set_axis_off() + ax.set_facecolor("lightslategray") + plt.show() + + return (Gref, newIntrs) + +def tAllRefData(ref): + d = "../data/organism" + dExprs = "../data/geneExpression" + timepoints = [10, 40, 96, 120, 132] + + for t in timepoints: + print(t) + cells = readDataT(d, dExprs, t) + P, interactions = go(cells, 2, 0.1) + _, interactionsC = go(cells, 2, 1.0) + + Gr, nIntrs = drawRefData(interactions, interactionsC, cells, ref, geneFs) + + fnOutG = "t" + str(t) + "_net.txt" + fnOutN = "t" + str(t) + "_nIntrs.txt" + nx.nx_agraph.write_dot(Gr, fnOutG) + + with open(fnOutN, 'w') as fout: + for i in nIntrs: + fout.write(str(i) + "\n") + + return + +def tAllStatic(s, ref, w0=False): + d = "../data/organism" + dExprs = "../data/geneExpression" + timepoints = [10, 40, 96, 120, 132] + + for t in timepoints: + print(t) + cells = readDataT(d, dExprs, t) + P, interactions = go(cells, 2, s) + + for intr in interactions: + print(intr) + print(intr.w) + + Gr = mkWGraphRef(interactions, ref, cells) + drawWGraph(Gr) + + return + +def tAll(s): + d = "../data/organism" + dExprs = "../data/geneExpression" + timepoints = [10, 40, 96, 120, 132] + + for t in timepoints: + print(t) + cells = readDataT(d, dExprs, t) + P, interactions = go(cells, 2, s) + + G = toGraph(interactions, cells) + + if s == 1.0: drawGraph(G) + else: drawWGraph(G) + + return + +def isFunction(ios): + pass + +# to read specific timepoint (e.g.40) +# cells = readDataT("../data/organism", "../data/geneExpression", 40) +# to read all timepoints +# cells = readData("../data/organism", "../data/geneExpression") +# +# P, interactions = go(cells, 2) #for max number of inputs=2 +# printInteractions(interactions, cells) #print interactions + truth tables for each diff --git a/geneRegulation/boolFs.py b/geneRegulation/boolFs.py new file mode 100644 index 0000000000000000000000000000000000000000..20032fa5bf086ade609c20b54b7897883fad12ff --- /dev/null +++ b/geneRegulation/boolFs.py @@ -0,0 +1,1231 @@ +from __future__ import division +from mpl_toolkits.mplot3d import axes3d +import grn +import analyseExprs as a +import matplotlib.pyplot as plt +import numpy as np +import itertools +import common.seg as seg +from os.path import join +from subprocess import call +from functools import reduce +import os +import math + +geneNms = ['AG', + 'AHP6', + 'ANT', + 'AP1', + 'AP2', + 'AP3', + 'AS1', + 'CLV3', + 'CUC1_2_3', + 'ETTIN', + 'FIL', + 'LFY', + 'MP', + 'PHB_PHV', + 'PI', + 'PUCHI', + 'REV', + 'SEP3', + 'STM', + 'SUP', + 'SVP', + 'WUS'] + +def cellsToOrg(fn, cells): + header = "x y z vol id " + ' '.join(cells[0].geneNms) + with open(fn, 'w+') as fout: + fout.write("\n".join([header] + [cell.toOrganism() for cell in cells])) + + return + +def cellsToDat(fn, cells): + #so we can use newman + nvars = len(cells[0].exprs.keys()) + 5 + ncells = len(cells) + ntpoints = 1 + + header = "\n".join([str(ntpoints), + " ".join([str(ncells), str(nvars), "0"])]) + + with open(fn, 'w+') as fout: + fout.write("\n".join([header] + [cell.toOrganism() for cell in cells])) + + return + +def getFsAll(intrs, cells, f): + gintrs = dict() + for intr in intrs: + out = intr[2] + print(out) + bterms = f(intr, cells) + gintrs[out] = bterms + + return gintrs + +def isFunction(ios): + domain = [i for i, o in ios] + + return len(domain) == len(set(domain)) + +def intToBool(i): + if i == 0: return False + else: return True + +def intsToBools(ns): + return [intToBool(n) for n in ns] + +def infoP(correct, p): + correct = set([i + (o, ) for i, o in correct]) + if tuple(intsToBools(p)) in correct: return " + " + else: return "" + +def acts(f): + (acts, _, _, _, _) = f + + return acts + +def reprs(f): + (_, reprs, _, _, _) = f + + return reprs + +def diffBF(f, f1): + dacts = set.difference(set(acts(f1)), set(acts(f))) + dreprs = set.difference(set(reprs(f1)), set(reprs(f))) + + dnacts = set.difference(set(acts(f)), set(acts(f1))) + dnreprs = set.difference(set(reprs(f)), set(reprs(f1))) + + return (dacts, dreprs, set.union(dnacts, dnreprs)) + +def plotBoolF1(cells, f, f1, axs): + (acts, reprs, out, fsActs, fsReprs) = f1 + (dacts, dreprs) = diffBF(f, f1) + inputs = acts + reprs + intr = (acts, reprs, out) + tt = a.Interaction(inputs, out, 0.0).mkTruthTable(cells) + correct = evalTT(tt, intr, fsActs, fsReprs) + + ninputs = len(inputs) + + ios = np.array([boolsToInt(list(p[0] + (p[1],))) for p in tt]) + ngenes = ios.shape[1] + npats = ios.shape[0] + + ps = a.psGk(cells, list(inputs + (out,))) + score = getScoreMI(tt, ps, intr, fsActs, fsReprs) + percCorrect = getScore(tt, correct, ps) * 100 + + xlabels = list(inputs + (out,)) + ylabels = [infoP(correct, ios[p, :]) + str(round(ps[tuple(ios[p, :])], 2)) + for p in xrange(npats)] + + axs.matshow(ios,cmap=plt.cm.gray_r, alpha = 0.75, aspect='auto') + xaxis = axs.xaxis + yaxis = axs.yaxis + + xlocs = np.array([i for i in xrange(ngenes)]) + ylocs = np.array([i for i in xrange(npats)]) + + xaxis.set_ticks(xlocs + 0.5, minor=True) + xaxis.set(ticks=xlocs, ticklabels=xlabels) + + yaxis.set_ticks(ylocs + 0.5, minor=True) + yaxis.set(ticks=ylocs, ticklabels=ylabels) + + #if not isFunction(tt): axs.set_xlabeel('not a function', color='r') + axs.set_xlabel(bfToString(f) + "\n" + + str(round(percCorrect, 2)) + "\n" + "MI=" + str(round(score, 2)) + "\n" + ) + xaxis.grid(True, which='minor') + yaxis.grid(True, which='minor') + + for xt in axs.get_xticklabels(): + if xt.get_text() in dacts or xt.get_text() in dreprs: + xt.set_color('red') + + return + +def plotBoolF(cells, f, axs): + (acts, reprs, out, fsActs, fsReprs) = f + inputs = acts + reprs + + ninputs = len(inputs) + tt = a.Interaction(inputs, out, 0.0).mkTruthTable(cells) + intr = (acts, reprs, out) + correct = evalTT(tt, intr, fsActs, fsReprs) + + ios = np.array([boolsToInt(list(p[0] + (p[1],))) for p in tt]) + ngenes = ios.shape[1] + npats = ios.shape[0] + + ps = a.psGk(cells, list(inputs + (out,))) + score = getScoreMI(tt, ps, intr, fsActs, fsReprs) + percCorrect = getScore(tt, correct, ps) * 100 + + xlabels = list(inputs + (out,)) + ylabels = [infoP(correct, ios[p, :]) + str(round(ps[tuple(ios[p, :])], 2)) + for p in xrange(npats)] + + + + axs.matshow(ios,cmap=plt.cm.gray_r, alpha = 0.75, aspect='auto') + xaxis = axs.xaxis + yaxis = axs.yaxis + + xlocs = np.array([i for i in xrange(ngenes)]) + ylocs = np.array([i for i in xrange(npats)]) + + xaxis.set_ticks(xlocs + 0.5, minor=True) + xaxis.set(ticks=xlocs, ticklabels=xlabels) + + yaxis.set_ticks(ylocs + 0.5, minor=True) + yaxis.set(ticks=ylocs, ticklabels=ylabels) + + axs.set_xlabel(bfToString(f) + "\n" + + str(round(percCorrect, 2)) + "\n" + "MI=" + str(round(score, 2)) + "\n" + ) + xaxis.grid(True, which='minor') + yaxis.grid(True, which='minor') + + + + return + +def plotBoolF(cells, f, axs): + (acts, reprs, out, fsActs, fsReprs) = f + inputs = acts + reprs + + ninputs = len(inputs) + tt = a.Interaction(inputs, out, 0.0).mkTruthTable(cells) + intr = (acts, reprs, out) + correct = evalTT(tt, intr, fsActs, fsReprs) + + ios = np.array([boolsToInt(list(p[0] + (p[1],))) for p in tt]) + ngenes = ios.shape[1] + npats = ios.shape[0] + + ps = a.psGk(cells, list(inputs + (out,))) + score = getScoreMI(tt, ps, intr, fsActs, fsReprs) + percCorrect = getScore(tt, correct, ps) * 100 + + xlabels = list(inputs + (out,)) + ylabels = [infoP(correct, ios[p, :]) + str(round(ps[tuple(ios[p, :])], 2)) + for p in xrange(npats)] + + axs.matshow(ios,cmap=plt.cm.gray_r, alpha = 0.75, aspect='auto') + xaxis = axs.xaxis + yaxis = axs.yaxis + + xlocs = np.array([i for i in xrange(ngenes)]) + ylocs = np.array([i for i in xrange(npats)]) + + xaxis.set_ticks(xlocs + 0.5, minor=True) + xaxis.set(ticks=xlocs, ticklabels=xlabels) + + yaxis.set_ticks(ylocs + 0.5, minor=True) + yaxis.set(ticks=ylocs, ticklabels=ylabels) + + axs.set_xlabel(bfToString(f) + "\n" + + str(round(percCorrect, 2)) + "\n" + "MI=" + str(round(score, 2)) + "\n" + ) + xaxis.grid(True, which='minor') + yaxis.grid(True, which='minor') + + return + +def plotBoolIntr(cells, intr, axs): + (acts, reprs, out) = intr + inputs = acts + reprs + + intr = a.Interaction(inputs, out, 0.0) + tt = intr.mkTruthTable(cells) + ninputs = len(intr.inputs) + tt = intr.mkTruthTable(cells) + ios = np.array([boolsToInt(list(p[0] + (p[1],))) for p in tt]) + ios = ios[np.argsort(np.sum(ios[:, :ninputs], axis=1)), ] + ngenes = ios.shape[1] + npats = ios.shape[0] + + ps = a.psGk(cells, list(intr.inputs + (intr.out,))) + xlabels = list(intr.inputs + (intr.out, )) + ylabels = [round(ps[tuple(ios[p, :])], 2) for p in xrange(npats)] + + axs.matshow(ios,cmap=plt.cm.gray_r, alpha = 0.75, aspect='auto') + #axs.set_title(intr.out, fontweight='bold') + xaxis = axs.xaxis + yaxis = axs.yaxis + + xlocs = np.array([i for i in xrange(ngenes)]) + ylocs = np.array([i for i in xrange(npats)]) + + xaxis.set_ticks(xlocs + 0.5, minor=True) + xaxis.set(ticks=xlocs, ticklabels=xlabels) + + yaxis.set_ticks(ylocs + 0.5, minor=True) + yaxis.set(ticks=ylocs, ticklabels=ylabels) + + if not isFunction(tt): + print("not a function") + axs.set_xlabel('not a function', color='r') + + if isFunction(tt): + print('function') + axs.set_xlabel('function', color='b') + + + xaxis.grid(True, which='minor') + yaxis.grid(True, which='minor') + + +def lor(a, b): return a or b + +def land(a, b): return a and b + +def boolToInt(b): + if b: return 1 + else: return 0 + +def boolsToInt(bs): + return [boolToInt(b) for b in bs] + +def const2(a, b): return b + +def folds1(vals, fs, iVal): + acc = iVal + for i, v in enumerate(vals): + acc = fs[i](acc, v) + + return acc + +def folds(vals, fs): + fs = (const2, ) + fs + + return folds1(vals, fs, False) + +def evalI(intr, vals, fsAct, fsRepr): + acts = intr[0] + reprs = intr[1] + + nActs = len(acts) + nReprs = len(reprs) + + if vals[nActs:]: + return (folds(vals[:nActs], fsAct) and + not folds(vals[nActs:], fsRepr)) + else: + return folds(vals[:nActs], fsAct) + + +def evalIOr(intr, vals, fsAct, fsRepr): + acts = intr[0] + reprs = intr[1] + + nActs = len(acts) + nReprs = len(reprs) + + if vals[nActs:]: + return (folds(vals[:nActs], fsAct) or + not folds(vals[nActs:], fsRepr)) + else: + return folds(vals[:nActs], fsAct) + + + +def evalTT(tt, intr, fsAct, fsRepr): + correct = [] + for i, o in tt: + if evalI(intr, i, fsAct, fsRepr) == o: + correct.append((i, o)) + + return correct + +def getScore(tt, correct, ps): + acc = 0.0 + for i, o in correct: + acc += ps[tuple(boolsToInt(i + (o, )))] + + return acc + +def getScoreTT(tt, ps, intr, fsAct, fsRepr): + correct = [] + acc = 0.0 + + for i, o in tt: + if evalI(intr, i, fsAct, fsRepr) == o: + correct.append((i, o)) + + for i, o in correct: + acc += ps[tuple(boolsToInt(i + (o, )))] + + return acc + +def hDist(ps, ps1): + s = sum((math.sqrt(p) - math.sqrt(p1))**2 + for p, p1 in zip(ps, ps1)) + + return 1/math.sqrt(2) * math.sqrt(s) + +def klDist(ps, ps1): + if ([1 for p in ps if p == 0] or + [1 for p in ps1 if p == 0]): return 1.0 + + return -sum(p*math.log(p1 / p, 2) for p, p1 in zip(ps, ps1)) + +def log0(x): + if x == 0: return 0.0 + else: return math.log(x, 2) + +def mi(ps, p1, p2): + s = 0.0 + for v in [0, 1]: + for v1 in [0, 1]: + if p1[v] == 0 or p2[v] == 0 or ps[(v, v1)] == 0: + s+= 0 + else: + s += ps[(v, v1)] * log0(ps[(v, v1)] / (p1[v] * p2[v1])) + + return s + +def getScoreMI(tt, ps, intr, fsAct, fsRepr): + joint = dict([((v, v1), 0.0) for v in [0, 1] for v1 in [0, 1]]) + p1 = dict([(v, 0.0) for v in [0, 1]]) + p2 = dict([(v, 0.0) for v in [0, 1]]) + + for i, o in tt: + pred = evalI(intr, i, fsAct, fsRepr) + joint[(boolToInt(o), boolToInt(pred))] += ps[tuple(boolsToInt(i + (o, )))] + p1[boolToInt(o)] += ps[tuple(boolsToInt(i + (o, )))] + p2[boolToInt(pred)] += ps[tuple(boolsToInt(i + (o, )))] + + return mi(joint, p1, p2) + +def tsGetScoreMI(ts1, ts2, g): + joint = np.zeros([2, 2]) + p1 = np.zeros(2) + p2 = np.zeros(2) + + n = len(ts1.cells.keys()) + + for cid in ts1.cells.keys(): + e1 = ts1.cells[cid].exprs[g] + e2 = ts2.cells[cid].exprs[g] + joint[(boolToInt(e1), boolToInt(e2))] += 1 + p1[boolToInt(e1)] += 1 + p2[boolToInt(e2)] += 1 + + + joint = joint / n + p1 = p1 / n + p2 = p2 / n + + return mi(joint, p1, p2) + +def ts_bacc(ts1, ts2, g): + pos = sum([1 for c in ts1 if c.exprs[g]]) + neg = sum([1 for c in ts1 if not c.exprs[g]]) + + tp = 0 + tn = 0 + + tpr = 0.0 + tnr = 0.0 + + for c in ts1: + if c.exprs[g] and ts2.cells[c.cid].exprs[g]: + tp += 1 + + if (not c.exprs[g]) and (not ts2.cells[c.cid].exprs[g]): + tn += 1 + + if not pos ==0: + tpr = tp / pos + else: + tpr = 1.0 + + if not neg == 0: + tnr = tn / neg + else: + tnr = 1.0 + + return 0.5*(tpr + tnr) + +def getScoreBAcc(tt, ps, intr, fsAct, fsRepr): + pos = sum([ps[tuple(boolsToInt(i + (o, )))] for i, o in tt if o]) + neg = sum([ps[tuple(boolsToInt(i + (o, )))] for i, o in tt if not o]) + + tp = 0.0 + tn = 0.0 + + tpr = 0.0 + tnr = 0.0 + + for i, o in tt: + if o and evalI(intr, i, fsAct, fsRepr): + tp += ps[tuple(boolsToInt(i + (o, )))] + + if (not o) and (not evalI(intr, i, fsAct, fsRepr)): + tn += ps[tuple(boolsToInt(i + (o, )))] + + if not pos == 0: + tpr = tp / pos + else: + tpr = 1.0 + + if not neg == 0: + tnr = tn / neg + else: + tnr = 1.0 + + return 0.5*(tpr + tnr) + + +def tsPercCorrect(ts1, ts2, g): + correct = 0 + n = len(ts1.cells.keys()) + + for cid in ts1.cells.keys(): + e1 = ts1.cells[cid].exprs[g] + e2 = ts2.cells[cid].exprs[g] + if e1 == e2: + correct += 1 + + return correct / n + +def getScoreDistr(tt, ps, intr, fsAct, fsRepr): + psR = [0.0, 0.0] + psF = [0.0, 0.0] + + for i, o in tt: + pred = evalI(intr, i, fsAct, fsRepr) + psF[boolToInt(pred)] += ps[tuple(boolsToInt(i + (o, )))] + + for i, o in tt: + psR[boolToInt(o)] += ps[tuple(boolsToInt(i + (o, )))] + + return (1 - klDist(psR, psF)) + +def boolTerms(elems): + #generator for all boolean expression (conjunctions/disjunctions) between elems + nElems = len(elems) + for fs in itertools.product(*itertools.repeat([lor, land], nElems-1)): + for pelem in itertools.permutations(elems): + yield (pelem, fs) + + +def rm_scores(f): + (acts, reprs, out, fsAct, fsRepr, _, _) = f + + return (acts, reprs, out, fsAct, fsRepr) + +def rm_tt(f): + (acts, reprs, out, fsAct, fsRepr, score, _) = f + + return (acts, reprs, out, fsAct, fsRepr, score) + +def checkFs(intr, cells, obj=getScoreBAcc): + acts = intr[0] + reprs = intr[1] + out = intr[2] + + nActs = len(acts) + nReprs = len(reprs) + + fss = [] + for pacts, fsAct in boolTerms(acts): + for preprs, fsRepr in boolTerms(reprs): + inputs = pacts + tuple(preprs) + tt = a.Interaction(inputs, out, 0.0).mkTruthTable(cells) + ps = a.psGk(cells, list(inputs) + [out]) + correct = evalTT(tt, intr, fsAct, fsRepr) + fss.append((pacts, + tuple(preprs), + out, + fsAct, + fsRepr, + obj(tt, ps, intr, fsAct, fsRepr), + correct)) + + return list(map(rm_tt, + sorted(fss, key=lambda x: x[5], reverse=True))) + +def checkFs1(intr, cells): + fs = checkFs(intr, cells) + return fs[0] + ("",) + +def fToString(f): + if 'land' in repr(f): return u'\u2227' + else: return u'\u2228' + +def bfToString(f): + (bacts, breprs, out, fsActs, fsReprs) = f + + factss = [fToString(f) for f in fsActs] + freprss = [fToString(f) for f in fsReprs] + + formula_str = "" + actss = "" + reprss = "" + + if bacts: + actss = "".join([relabelAnnot(val) for pair in zip(bacts, factss) for val in pair] + [bacts[-1]]) + formula_str += actss + + if breprs: + reprss = "".join([relabelAnnot(val) for pair in zip(breprs, freprss) for val in pair] + [breprs[-1]]) + formula_str += ('\n\u00AC' + reprss) + + return formula_str + +def updAct(intr, g): + acts, reprs, out = intr + + return (acts+(g,), reprs, out) + +def updRepr(intr, g): + acts, reprs, out = intr + + return (acts, reprs+(g,), out) + +def remT(t, rel): + return tuple(el for el in t if not el == rel) + +def searchIntrs(intr, cells, geneNms, updIntr): + G = set(geneNms) + I = set(intr[0] + intr[1] + (intr[2], )) + + fs = [] + for g in list(set.difference(G, I)): + acts, reprs, out = intr + intrG = updIntr(intr, g) + + f = checkFs(intrG, cells)[0] + fs.append(f) + + #return max(fs, key=lambda x: x[5]) + return fs + +def searchIntrsNeg(intr, cells): + acts, reprs, out = intr + + fs = [] + #remove acts + for g in acts: + intrG = (remT(acts, g), reprs, out) + f = checkFs(intrG, cells)[0] + fs.append(f) + + #remove reprs + for g in reprs: + intrG = (acts, remT(reprs, g), out) + f = checkFs(intrG, cells)[0] + fs.append(f) + + return fs + +def tinds(t, inds): + return [t[i] for i in inds] + + +def evalB(f, vals): + (acts, reprs, out, fsAct, fsRepr) = f + intr = (acts, reprs, out) + + + return evalI(intr, vals, fsAct, fsRepr) + +def evalB_d(f, actVals, reprVals): + (acts, reprs, out, fsAct, fsRepr) = f + intr = (acts, reprs, out) + + vals = [actVals[act] for act in acts] + [reprVals[repr] for repr in reprs] + + return evalI(intr, vals, fsAct, fsRepr) + +def eq_tt1(f, f1): + #assumes same variables + from itertools import product, repeat + + (acts, reprs, out, fsAct, fsRepr) = f + + nActs = len(acts) + nReprs = len(reprs) + + for vals in list(product(*repeat([0, 1], nActs+nReprs))): + actVals = dict([(act, val) for act, val in zip(acts, vals[:nActs])]) + reprVals = dict([(rpr, val) for rpr, val in zip(reprs, vals[nActs:])]) + + res = evalB_d(f, actVals, reprVals) + res1 = evalB_d(f1, actVals, reprVals) + + if res != res1: return False + + return True + + +def eq_tt(f, f1): + from itertools import product, repeat + fEvals = [] + f1Evals = [] + + nActs = len(acts(f)) + nReprs = len(reprs(f)) + + nActs1 = len(acts(f1)) + nReprs1 = len(reprs(f1)) + + if not nActs == nActs1 or not nReprs == nReprs1: + return False + + for vals in list(product(*repeat([0, 1], nActs+nReprs))): + actVals = vals[:nActs] + reprVals = vals[nActs:] + + valsF = (tinds(actVals, np.argsort(np.argsort(acts(f)))) + + tinds(reprVals, np.argsort(np.argsort(reprs(f))))) + + valsF1 = (tinds(actVals, np.argsort(np.argsort(acts(f1)))) + + tinds(reprVals, np.argsort(np.argsort(reprs(f1))))) + + fEvals.append(evalB(f, valsF)) + f1Evals.append(evalB(f1, valsF1)) + + return fEvals == f1Evals + +def uniq_fs(fs): + ufs = [] + for (i, f) in enumerate(fs): + if not any([eq_tt(uf, f) for uf in ufs]): + ufs.append(f) + + return ufs + +def nb_fs(items, eq=eq_tt1): + nb_items = [] + for i, item in enumerate(items): + print(i) + if any([eq(item, ui) for ui in nb_items]): continue + else: nb_items.append(item) + + return nb_items + +def searchIntrsD1(intr, cells, geneNms=geneNms): + return sorted((searchIntrs(intr, cells, geneNms, updAct) + + searchIntrs(intr, cells, geneNms, updRepr) + + searchIntrsNeg(intr, cells)), + key=lambda x: x[5], reverse=True) + +def rm_score(f): + (acts, reprs, out, fsAct, fsReprs, _) = f + return (acts, reprs, out, fsAct, fsReprs) + + +def score(f): + (_, _, _, _, _, score) = f + return score + +def maxs_fs(fs, n=2, key=score): + from itertools import groupby + + gs = [] + m = groupby(sorted(fs, key=key, reverse=True), key) + for v, g in m: + gs.append(list(g)) + + return [f for f in reduce(lambda x, y: x+y, gs[:n])] + +def max1_fs(fs, key=score, eps=0.1): + m = max(fs, key=key) + + return [f for f in fs if (abs(score(f) - score(m)) / score(m)) < eps] + +def merge_bterms(fsT, g, mf, ts=[10, 40, 96, 120, 132]): + mfsT = [] + for t in ts: + fs1 = list(map(rm_score, mf(fsT[t][g]))) + #fs1 = mf(fsT[t][g]) + mfsT.append(set(fs1)) + + if mfsT: return reduce(set.intersection, mfsT) + else: return set() + +def merge_hyps(fsT, fsTRef, g, mf, ts=[10, 40, 96, 120, 132]): + top_hyps = [] + for t in ts: + top_fs = [rm_score(f) for f in max1_fs(fsT[t][g])] + top_hypsT = set([diffBF(rm_score(fsTRef[t][g][0]), f) for f in top_fs]) + + top_hyps.append(top_hypsT) + + if top_hyps: return reduce(set.intersection, top_hyps) + else: return set() + + +def is_expr(ts, g): + return sum([1 for c in ts if c.exprs[g]]) > 0 + +def ts_expr(tss, g): + return [t for t, ts in tss.items() if is_expr(ts, g)] + +def goSim(fnInit, fnFinal, nsteps): + d = "../data/organism" + dExprs = "../data/geneExpression" + G = grn.GRN("../data/organism/modelFiles/refNet2.txt") + + cells = a.readDataT(d=d, dExprs=dExprs, t=132) + intrs = grn.getInteractionsGenesT(a.geneNms, G) + gintrs = getFs(intrs, cells) + + cellsToOrg(fnInit, cells) + + for i in xrange(nsteps): + cells = updateTissue(gintrs, cells) + + cellsToOrg(fnFinal, cells) + + +def mkFnDat(t): + return "out_t" + str(t) + "_data.data" + +def mkFnRef(t): + return "out_t" + str(t) + "_ref.data" + +def mkFnHyp(t): + return "out_t" + str(t) + "_hyp.data" + +def mkFnRefFs(t): + return "net_" + str(t) + "_ref" + +def mkFnHypFs(t): + return "net_" + str(t) + "_hyp" + +def reprF(f, out): + return out + " = " + bfToString(f) + +def writeFs(fs, fnOut): + with open(fnOut, 'w') as fOut: + fOut.write('\n'.join([reprF(f, g) for g, f in fs.items()])) + + return + +def goSimDatTs(fs, fs1, outDir): + d = "../data/organism" + dExprs = "../data/geneExpression" + timepoints = [132] + G = grn.GRN("../data/organism/modelFiles/refNet2.txt") + intrs = grn.getInteractionsGenesT(a.geneNms, G) + + for t in timepoints: + print(t) + cells = a.readDataT(d=d, dExprs=dExprs, t=t) + + fnDat = join(outDir, mkFnDat(t)) + fnOut = join(outDir, mkFnRef(t)) + fnOut1 = join(outDir, mkFnHyp(t)) + + cellsToDat(fnDat, cells) + cellsToDat(fnOut, updateTissue(fs, cells)) + cellsToDat(fnOut1, updateTissue(fs1, cells)) + + writeFs(fs, join(outDir, mkFnRefFs(t))) + writeFs(fs1, join(outDir, mkFnHypFs(t))) + + return + +def toNewmanFn(fn): + (fn, ext) = os.path.splitext(fn) + return fn + "000" + ".tif" + +def goVisPatternsTs(confFile, outDir, visDir): + timepoints = [132] + visCmd = "/home2/Organism/tools/plot/bin/newman" + mergeCmd = "convert" + + for t in timepoints: + fnDat = join(outDir, mkFnDat(t)) + fnOut = join(outDir, mkFnRef(t)) + fnOut1 = join(outDir, mkFnHyp(t)) + visDirT = join(visDir, "t"+str(t)) + if not os.path.exists(visDirT): + os.makedirs(visDirT) + for i, gene in enumerate(a.geneNms): + print(i, gene) + dataFn = join(visDirT, gene+"_data") + refFn = join(visDirT, gene+"_ref") + hypFn = join(visDirT, gene+"_hyp") + #draw data pattern + call([visCmd, + "-shape", "sphereVolume", + "-d", "3", + fnDat, + "-column", str(i+1), + "-output", "tiff", + dataFn, + "-camera", confFile, + "-min", str(0), + "-max", str(1)]) + + #draw ref data pattern + call([visCmd, + "-shape", "sphereVolume", + "-d", "3", + fnOut, + "-column", str(i+1), + "-output", "tiff", + refFn, + "-camera", confFile, + "-min", str(0), + "-max", str(1)]) + + #draw hyp data pattern + call([visCmd, + "-shape", "sphereVolume", + "-d", "3", + fnOut1, + "-column", str(i+1), + "-output", "tiff", + hypFn, + "-camera", confFile, + "-min", str(0), + "-max", str(1)]) + + #merge + call([mergeCmd, + toNewmanFn(dataFn), + toNewmanFn(refFn), + toNewmanFn(hypFn), + "+append", + "-pointsize", "20", + "-fill", "white", + "-annotate", "+10+40", gene, + join(visDirT, gene+".tif")]) + + #delete individual images + os.remove(toNewmanFn(dataFn)) + os.remove(toNewmanFn(refFn)) + os.remove(toNewmanFn(hypFn)) + + +def goPlotFs(fs): + d = "../data/organism" + dExprs = "../data/geneExpression" + cells = a.readDataT(d=d, dExprs=dExprs, t=132) + + fig, axs = plt.subplots(nrows=4, ncols=5) + axs = axs.reshape(-1) + fig.tight_layout() + + fsR = [f for g, f in fs.items() if acts(f) or reprs(f)] + + for i, f in enumerate(fsR): + plotBoolF(cells, f, axs=axs[i]) + + return + +def goPlotFs1(fs, fs1): + d = "../data/organism" + dExprs = "../data/geneExpression" + cells = a.readDataT(d=d, dExprs=dExprs, t=132) + + fig, axs = plt.subplots(nrows=4, ncols=5) + axs = axs.reshape(-1) + fig.tight_layout() + + fsR = dict([(g, f) for g, f in fs.items() if acts(f) or reprs(f)]) + + for i, gf in enumerate(fsR.items()): + g, f = gf + plotBoolF1(cells, f, fs1[g], axs=axs[i]) + + plt.show() + + return + +def plotPatterns(baseDir): + import os + from subprocess import call + mergeCmd = "convert" + + for f in os.listdir(baseDir): + (fn, ext) = os.path.splitext(f) + if fn.endswith("_in"): + geneNm = fn.split("_")[0] + fn1 = os.path.join(baseDir, f) + fn2 = os.path.join(baseDir, geneNm + "_out.png") + mergedFile = os.path.join(baseDir, geneNm.upper() + ".png") + call([mergeCmd, + fn1, + fn2, + "+append", + "-pointsize", "20", + "-fill", "white", + "-annotate", "+10+40", geneNm.upper(), + mergedFile]) + +def get_best(fsT): + fsT_best = dict() + for t, fs in fsT.items(): + fsT_best[t] = dict() + for g, bs in fs.items(): + fsT_best[t][g] = rm_score(max(fsT[t][g], key=lambda x: x[5])) + + return fsT_best + +def plotTissueExpr(ts, g): + xs = [c.pos.x for c in ts] + ys = [c.pos.y for c in ts] + zs = [c.pos.z for c in ts] + + cs = ['red' if c.exprs[g] else 'blue' for c in ts] + + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + ax.view_init(elev=95, azim=270) + ax.scatter(xs, ys, zs, c=cs, alpha=0.9, s=200, + edgecolors='black', linewidths=0.5) + + plt.show() + +def get_fsT(tss, intrs, tpoints=[10, 40, 96, 120, 132]): + fs = dict() + for t in tpoints: + print(t) + fs[t] = getFsAll(intrs, list(tss[t]), checkFs) + + return fs + + +def get_fsTD1(tss, intrs, tpoints=[10, 40, 96, 120, 132]): + fsD1 = dict() + for t in tpoints: + print(t) + fsD1[t] = getFsAll(intrs, list(tss[t]), searchIntrsD1) + + return fsD1 + + +def getCons(g, fsT, tpoints=[10, 40, 96, 120, 132]): + fsG = [] + for i in range(len(tpoints)): + fsG.append(merge_bterms(fsT, g, max1_fs, + ts=tpoints[(-(i+1)):])) + + fsCons = [(t, fs) for t, fs in zip(tpoints, reversed(fsG)) if fs] + + (t, fs_) = fsCons[0] + + return (t, nb_fs(fs_)) + +def tuplify(xs): + return tuple(tuple(x) for x in xs) + +def merge_hyps(fsT, fsTRef, g, mf, ts=[10, 40, 96, 120, 132]): + top_hyps = [] + for t in ts: + top_fs = [rm_score(f) for f in max1_fs(fsT[t][g])] + top_hypsT = set([tuplify(diffBF(rm_score(fsTRef[t][g][0]), f)) for f in top_fs]) + + top_hyps.append(top_hypsT) + + if top_hyps: return reduce(set.intersection, top_hyps) + else: return set() + +def get_cons_hyps(fsT, fsTRef, g, ts=[10, 40, 96, 120, 132]): + fsG = [] + for i in range(len(ts)): + fsG.append(merge_hyps(fsT, fsTRef, g, max1_fs, + ts=ts[(-(i+1)):])) + + fsCons = [(t, fs) for t, fs in zip(ts, reversed(fsG)) if fs] + + return fsCons[0] + +def relabelAnnot(gn): + if gn == "CUC1_2_3": return "CUC" + elif gn == "AP1_2": return "AP1/2" + elif gn == "PHB_PHV": return "PHB" + elif gn == "ETTIN": return "ETT" + else: return gn + + +def lookupF(fs, f): + for f_ in fs: + if f_[:-1] == f: + return score(f_) + + return None + +def reportFs(g, fsRef, maxToReport=5, tpoints=[10, 40, 96, 120, 132]): + for t in tpoints: + print(t) + fs = max1_fs(fsRef[t][g]) + fs_ = uniq_fs(map(rm_score, fs)) + + for f in fs_[:maxToReport]: + print(bfToString(f)) + print(round(lookupF(fs, f), 3)) + + if len(fs_) - maxToReport > 0: + print("(+"+str(len(fs_) - maxToReport), "others)") + + print("--------") + + + (t, fsCons) = getCons(g, fsRef) + + for f in fsCons: + print(bfToString(f)) + print("(" + ", ".join([str(tp)+"h" for tp in tpoints[tpoints.index(t):]]) + ")") + + return + +def report_fs_csv(g, fsRef, maxToReport=5, tpoints=[10, 40, 96, 120, 132]): + gRow = "\"" + g + "\"" + + for t in tpoints: + fs = max1_fs(fsRef[t][g]) + fs_ = list(map(rm_score, fs)) + + tCell = "\"" + "\n".join([bfToString(f) + "\n" + str(round(lookupF(fs, f), 3)) + for f in fs_[:maxToReport]]) + + if len(fs_) - maxToReport > 0: + tCell += ("\n" + + "(+" + + str(len(fs_) - maxToReport) + + " others)") + + gRow += ("," + tCell + "\"") + + + (t, fsCons) = getCons(g, fsRef) + + summCell = "\"" + for f in fsCons: + summCell += bfToString(f) + summCell += "\n" + ("(" + + ", ".join([str(tp)+"h" for tp in tpoints[tpoints.index(t):]]) + + ")") + "\n" + + summCell += "\"" + gRow += ("," + summCell) + + return gRow + + +def reportFsD1(g, fsRef, fsT, maxToReport=5, tpoints=[10, 40, 96, 120, 132]): + for t in tpoints: + print(t) + fs = max1_fs(fsT[t][g]) + print(len(fs)) + for f in fs[:maxToReport]: + dActs, dReprs = diffBF(rm_score(fsRef[132][g][0]), rm_score(f)) + if dActs: + print(relabelAnnot(list(dActs)[0]), round(score(f), 3)) + elif dReprs: + print("!", relabelAnnot(list(dReprs)[0]), round(score(f), 3)) + print("(+"+str(len(fs) - maxToReport), "others)") + print("--------") + + (t, fsCons) = getCons(g, fsT) + print(t) + for f in fsCons: + dActs, dReprs = diffBF(rm_score(fsRef[132][g][0]), f) + if dActs: + print(relabelAnnot(list(dActs)[0])) + elif dReprs: + print("!", relabelAnnot(list(dReprs)[0])) + + return + +def report_fsD1_csv(g, fsRef, fsT, maxToReport=5, + tpoints=[10, 40, 96, 120, 132]): + gRow = "\"" + g + "\"" + for t in tpoints: + fs = max1_fs(fsT[t][g]) + fs_ = [rm_score(f) for f in fs] + tCell = "\"" + + for f in fs[:maxToReport]: + dActs, dReprs, dns = diffBF(rm_score(fsRef[132][g][0]), rm_score(f)) + if dActs: + tCell += ("\n" + + relabelAnnot(list(dActs)[0]) + + " " + + str(round(score(f), 3))) + elif dReprs: + tCell += ("\n" + + '\u00AC' + + relabelAnnot(list(dReprs)[0]) + + " " + + str(round(score(f), 3))) + elif dns: + tCell += ("\n" + + '--' + + relabelAnnot(list(dns)[0]) + + " " + + str(round(score(f), 3))) + + if len(fs) - maxToReport > 0: + tCell += ("\n" + "(+" + str(len(fs) - maxToReport) + " others)") + + tCell += "\"" + gRow += ("," + tCell) + + (t, hyps) = get_cons_hyps(fsT, fsRef, g) + summCell = "\"" + for hyp in hyps: + dActs, dReprs, dns = hyp + if dActs: + summCell += ("\n" + relabelAnnot(list(dActs)[0])) + elif dReprs: + summCell += ("\n" + '\u00AC' + relabelAnnot(list(dReprs)[0])) + elif dns: + summCell += ("\n" + '--' + relabelAnnot(list(dns)[0])) + + summCell += "\n" + ("(" + + ", ".join([str(tp)+"h" for tp in tpoints[tpoints.index(t):]]) + + ")") + "\n" + + summCell += "\"" + gRow += ("," + summCell) + + return gRow + +def serialise_bf(f): + if 'land' in repr(f): return 'and' + elif 'lor' in repr(f): return 'or' + else: raise ValueError("error") + +def deserialise_bf(f): + if f == 'and': return land + elif f == 'or': return lor + else: raise ValueError("error") + +def serialise_f(bexpr): + return (bexpr[:3] + + (tuple(serialise_bf(f) for f in bexpr[3]), + tuple(serialise_bf(f) for f in bexpr[4])) + + bexpr[5:]) + +def deserialise_f(bexpr): + return (bexpr[:3] + + (tuple(deserialise_bf(f) for f in bexpr[3]), + tuple(deserialise_bf(f) for f in bexpr[4])) + + bexpr[5:]) + +def map_gfs(gfs, sf): + return dict([(g, list(map(sf, fs))) + for g, fs in gfs.items()]) + +def map_fst(fsT, sf): + return dict([(t, map_gfs(gfs, sf)) + for t, gfs in fsT.items()]) + diff --git a/geneRegulation/fig6.ipynb b/geneRegulation/fig6.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..ee4598657ba999da973d4a45cdd8a9c7923aab0b --- /dev/null +++ b/geneRegulation/fig6.ipynb @@ -0,0 +1,804 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Figure 6" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "from itertools import repeat\n", + "\n", + "import _pickle as cPickle\n", + "import seaborn as sns\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import lin\n", + "import boolFs as b\n", + "import analyseExprs as a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## C" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class CompMethod(object):\n", + " \n", + " def __init__(self, cmpFn, name):\n", + " self.cmpFn = cmpFn\n", + " self.name = name\n", + " \n", + " def __call__(self, ts, ts1, g):\n", + " return self.cmpFn(ts, ts1, g) \n", + " \n", + "def cmpTss(tss, tss1, gs, cmpFn):\n", + " resT = defaultdict(list)\n", + " \n", + " for t in tss.keys():\n", + " for g in gs[t]:\n", + " resT[t].append((g, cmpFn(tss[t], tss1[t], g)))\n", + " \n", + " return resT\n", + "\n", + "def cmpTssLin(tss, tss1, gs, cmpFn):\n", + " resT = defaultdict(list)\n", + " \n", + " for t in tss.keys():\n", + " for g in gs[t]:\n", + " if lin.prev(t):\n", + " per = (lin.prev(t), t)\n", + " score = cmpFn(tss[t], tss1[per], g)\n", + " \n", + " resT[per].append((g, score))\n", + " \n", + " return resT\n", + "\n", + "def fst(p): return p[0]\n", + "\n", + "def snd(p): return p[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: '../data/boolNets/fst_D1_2.pkl'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-3-fc823ea5f605>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mfsT_s\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcPickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfin\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"../data/boolNets/fst_D1_2.pkl\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"rb\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfin\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mfsT_D1_s\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcPickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfin\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../data/boolNets/fst_D1_2.pkl'" + ] + } + ], + "source": [ + "with open(\"../data/boolNets/fsT_2.pkl\", \"rb\") as fin:\n", + " fsT_s = cPickle.load(fin)\n", + " \n", + "with open(\"../data/boolNets/fst_D1_2.pkl\", \"rb\") as fin:\n", + " fsT_D1_s = cPickle.load(fin)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import grn\n", + "fsT = b.map_fst(fsT_s, b.deserialise_f)\n", + "fsT_D1 = b.map_fst(fsT_D1_s, b.deserialise_f)\n", + "\n", + "fs = b.get_best(fsT)\n", + "fs1 = b.get_best(fsT_D1)\n", + "\n", + "G = grn.GRN(\"../data/boolNets/refInteractions_analysis_20191216.txt\")\n", + "geneNms = G.getUniqGeneNames()\n", + "\n", + "\n", + "tpoints = [10, 40, 96, 120, 132]\n", + "tss, linss = lin.mkSeries()\n", + "lin.filterL1(tss)\n", + "\n", + "cmpFn = CompMethod(b.ts_bacc, \"BAcc\")\n", + "\n", + "gsReg1 = dict()\n", + "for t in tpoints:\n", + " gsReg1[t] = [g for g in geneNms if b.acts(fs[t][g])]\n", + "\n", + "tssLin = lin.updateTs(tss, linss, tpoints)\n", + "tssRef = lin.updateTsBF(tss, fs)\n", + "tssHyp = lin.updateTsBF(tss, fs1)\n", + "\n", + "\n", + "scoresLinT = cmpTssLin(tss, tssLin, gsReg1, cmpFn)\n", + "scoresRefT = cmpTss(tss, tssRef, gsReg1, cmpFn)\n", + "scoresHypT = cmpTss(tss, tssHyp, gsReg1, cmpFn)\n", + "with open(\"../data/boolNets/scoressRand_new.pkl\", \"rb\") as fRand:\n", + " scoresRandT = cPickle.load(fRand)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.rcParams.update({'font.size': 16})\n", + "fig = plt.figure(figsize=(3, 2.5))\n", + "ax = plt.subplot2grid((1, 1), (0, 0))\n", + "ax.set_xlabel(\"developmental stage\")\n", + "ax.set_ylabel(cmpFn.name)\n", + "cls = sns.color_palette()\n", + "sns.set_style('white')\n", + "\n", + "ax.set_ylim((0.0, 1.1))\n", + "ax.set_yticks([0.0, 0.5, 1.0])\n", + "ax.set_xticks(np.array([0, 1, 2, 3, 4]))\n", + "\n", + "def getVals(tvals):\n", + " xs = []\n", + " ys = []\n", + " ms = []\n", + " for t, vals in tvals.items():\n", + " xs = xs + list(repeat(t, len(vals)))\n", + " ys = ys + list(map(snd, vals))\n", + " ms.append((t, np.mean(list(map(snd, vals)))))\n", + " \n", + " return xs, ys, list(map(snd, sorted(ms, key=lambda x: x[0])))\n", + "\n", + "def getValsT(tvals, tpoints):\n", + " xs = []\n", + " ys = []\n", + " ms = []\n", + " \n", + " for t in tpoints:\n", + " if lin.prev(t):\n", + " xs = xs + list(repeat(t, len(tvals[(lin.prev(t), t)])))\n", + " ys = ys + list(map(snd, tvals[(lin.prev(t), t)]))\n", + " ms.append(np.mean(list(map(snd, tvals[(lin.prev(t), t)]))))\n", + " return xs, ys, ms\n", + "\n", + "\n", + "def toStages(xs):\n", + " tToStage = {10 : 0,\n", + " 40 : 1,\n", + " 96 : 2,\n", + " 120: 3,\n", + " 132: 4}\n", + " \n", + " return [tToStage[x] for x in xs]\n", + " \n", + "xs, ys, ms = getVals(scoresHypT)\n", + "xs1, ys1, ms1 = getVals(scoresRefT)\n", + "xs2, ys2, ms2 = getValsT(scoresLinT, tpoints)\n", + "xs3, ys3, ms3 = getVals(scoresRandT)\n", + "\n", + "sns.lineplot(x=toStages(xs), y=ys, ax=ax, color=cls[0], linewidth=4)\n", + "sns.lineplot(x=toStages(xs1), y=ys1, ax=ax, color=cls[1],linewidth=4)\n", + "sns.lineplot(x=toStages(xs3), y=ys3, ax=ax, color=cls[3], linewidth=4)\n", + "sns.lineplot(x=toStages(xs2), y=ys2, ax=ax, color=cls[2], linewidth=4)\n", + "\n", + "ax.legend(handles=ax.lines, labels=['hyp', 'ref', 'random', 'lin'], loc=3, prop={'size': 12})\n", + "\n", + "plt.savefig(\"figs/fig6/modComp.svg\", dpi=300)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def maxNDs(ss1, ss2, n=3):\n", + " gs = map(fst, ss1)\n", + " \n", + " ds = []\n", + " for g in gs:\n", + " m1 = getA(ss1, g)\n", + " m2 = getA(ss2, g)\n", + " \n", + " ds.append((g, abs(m2 - m1)))\n", + " \n", + " return set(map(fst, sorted(ds, key=lambda x :x[1], reverse=True)[:n]))\n", + "\n", + "def getA(kxs, ks):\n", + " res = [v for k, v in kxs if k == ks]\n", + " \n", + " if res: return res[0]\n", + " else: return None\n", + " \n", + "def relabelAnnot(gn):\n", + " if gn == \"CUC1_2_3\": return \"CUC\"\n", + " elif gn == \"AP1_2\": return \"AP\"\n", + " elif gn == \"PHB_PHV\": return \"PHB\"\n", + " elif gn == \"ETTIN\": return \"ETT\"\n", + " else: return gn\n", + " \n", + "geneFs = {\n", + " 'AUX': 'growth',\n", + " 'AP3': 'identity',\n", + " 'REV': 'polarity',\n", + " 'SUP': 'meristem',\n", + " 'AHP6': 'growth',\n", + " 'PHB_PHV': 'polarity',\n", + " 'SEP1_2': 'identity',\n", + " 'PI': 'identity',\n", + " 'LFY': 'growth',\n", + " 'AS1': 'polarity',\n", + " 'AG': 'identity',\n", + " 'STM' : 'meristem',\n", + " 'AP1_2': 'identity',\n", + " 'ETTIN': 'polarity',\n", + " 'SEP3' : 'identity',\n", + " 'FIL' : 'polarity',\n", + " 'WUS': 'meristem',\n", + " 'PUCHI' : 'growth',\n", + " 'ANT' : 'growth',\n", + " 'CUC1_2_3': 'meristem',\n", + " 'MP' : 'growth',\n", + " 'CLV3': 'meristem',\n", + " 'KAN1': 'polarity',\n", + " 'CLV1': 'meristem'\n", + " }\n", + " \n", + "fColours = {'identity': '#DAE589',\n", + " 'growth': '#3F8BFF',\n", + " 'polarity': '#FFBF6F',\n", + " 'meristem': '#E5738A',\n", + " '': '#FFFFFF'}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ref vs hyp per gene\n", + "\n", + "for t in tpoints:\n", + " plt.rcParams.update({'font.size': 14})\n", + " fig = plt.figure(figsize=(2, 2))\n", + " ax = plt.subplot2grid((1, 1), (0, 0))\n", + " cls = sns.color_palette()\n", + " sns.set_style('white')\n", + " hs = [geneFs.get(g, 'None') for g in list(map(fst, scoresRefT[t]))]\n", + " sns.scatterplot(list(map(snd, scoresRefT[t])), list(map(snd, scoresHypT[t])), hue=hs, ax=ax, s=80, alpha=1.0)\n", + " sns.lineplot(x=[-0.1, 1.1], y=[-0.1, 1.1], ax=ax, linewidth=0.5)\n", + " ax.set_xlim((-0.1, 1.1))\n", + " ax.set_ylim((-0.1, 1.1))\n", + " ax.set_xticks([0.0, 0.5, 1.0])\n", + " ax.set_yticks([0.0, 0.5, 1.0])\n", + " ax.set_xlabel(cmpFn.name + \" (ref)\")\n", + " ax.set_ylabel(cmpFn.name + \" (hyp)\")\n", + " ax.lines[0].set_linestyle(\"--\")\n", + " plt.rcParams.update({'font.size': 11})\n", + " max_ds = maxNDs(scoresRefT[t], scoresHypT[t], n=4)\n", + " for g in gsReg1[t]:\n", + " m1 = getA(scoresRefT[t], g)\n", + " m2 = getA(scoresHypT[t], g)\n", + " if g in max_ds: ax.annotate(relabelAnnot(g), (m1, m2))\n", + " \n", + " fn_out = \"figs/fig6/refHyp_\" + str(t) + \".svg\"\n", + " #plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", + " plt.savefig(fn_out, dpi=300, bbox_inches = 'tight', pad_inches = 0)\n", + " \n", + " \n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ref vs lin per gene" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "for t in tpoints:\n", + " if lin.prev(t):\n", + " plt.rcParams.update({'font.size': 15})\n", + " fig = plt.figure(figsize=(2, 2))\n", + " ax = plt.subplot2grid((1, 1), (0, 0))\n", + " cls = sns.color_palette()\n", + " sns.set_style('white')\n", + " hs = [geneFs.get(g, 'None') for g in list(map(fst, scoresRefT[t]))]\n", + " sns.scatterplot(list(map(snd, scoresRefT[t])), list(map(snd, scoresLinT[(lin.prev(t), t)])), ax=ax, s=80, hue=hs, palette=fColours)\n", + " sns.lineplot(x=[-0.1, 1.1], y=[-0.1, 1.1], ax=ax, linewidth=0.5)\n", + " ax.set_xlim((-0.1, 1.1))\n", + " ax.set_ylim((-0.1, 1.1))\n", + " ax.set_xticks([0.0, 0.5, 1.0])\n", + " ax.set_yticks([0.0, 0.5, 1.0])\n", + " ax.set_xlabel(cmpFn.name + \" (ref)\")\n", + " ax.set_ylabel(cmpFn.name + \" (lin)\")\n", + " ax.lines[0].set_linestyle(\"--\")\n", + " plt.rcParams.update({'font.size': 11})\n", + " max_ds = maxNDs(scoresRefT[t], scoresLinT[(lin.prev(t), t)], 4)\n", + " for g in gsReg1[t]:\n", + " m1 = getA(scoresRefT[t], g)\n", + " m2 = getA(scoresLinT[(lin.prev(t), t)], g)\n", + " if g in max_ds: ax.annotate(relabelAnnot(g), (m1, m2))\n", + " \n", + " #plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", + " plt.savefig(\"figs/fig6/refLin_\" + str(t) + \".svg\", dpi=300)\n", + " \n", + " \n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def print_exprs(fs, scores, genes):\n", + " for g in genes:\n", + " print(g)\n", + " print(b.bfToString(fs[g]))\n", + " print(getA(scores, g))\n", + " print()\n", + " \n", + " return\n", + "\n", + "def print_scores(scores, genes):\n", + " for g in genes:\n", + " print(g)\n", + " print(getA(scores, g))\n", + " print()\n", + " \n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vGenes = ['AG', 'AP1_2', 'STM', 'PHB_PHV', 'AHP6', 'ETTIN']\n", + "print_exprs(fs[132], scoresRefT[132], vGenes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print_exprs(fs1[132], scoresHypT[132], vGenes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print_scores(scoresLinT[(120, 132)], vGenes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import visTs as v\n", + "vGenes = ['AG', 'AP1', 'STM', 'PHB_PHV', 'AHP6', 'ETTIN']\n", + "v.plot_ts([tss[132], tssLin[(120, 132)], tssRef[132], tssHyp[132]], vGenes, [\"data\", \"lineage\", \"ref\", \"hyp\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for ts, lb in zip([tss, tssLin, tssRef, tssHyp], [\"data\", \"lineage\", \"ref\", \"hyp\"]):\n", + " print(len(list(ts)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## B" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def getA(kxs, ks):\n", + " res = [v for k, v in kxs if k == ks]\n", + " \n", + " if res: return res[0]\n", + " else: return None \n", + "\n", + "scoress = []\n", + "scoresLinT2 = dict([(t2, ss) for (t1, t2), ss in scoresLinT.items()])\n", + "\n", + "scoresLinT2[10] = [(g, 0.0) for g in gsReg1[10]]\n", + "\n", + "for g in gsReg1[132]:\n", + " gss = [] \n", + " for ms in [scoresLinT2, scoresRandT, scoresRefT, scoresHypT]:\n", + " gss += [getA(ms[t], g) for t in tpoints]\n", + " scoress.append(gss) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gsReg1[10]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.rcParams.update({'font.size': 11})\n", + "fig = plt.figure(figsize=(4, 3.5))\n", + "ax = plt.gca() \n", + "im = ax.imshow(np.array(scoress), interpolation='none',aspect='auto',cmap='coolwarm',origin='lower',vmin=0.0, vmax=1.0) \n", + "timeListLab = ['0','1','2','3','4','0','1','2','3','4','0','1','2','3','4','0','1'\n", + ",'2','3','4'] \n", + " \n", + "plt.colorbar(im,ax=ax, shrink=0.5, ticks=[0.0, 0.5, 1.0])\n", + "\n", + "\n", + "ax.set_xticks(np.arange(len(timeListLab)))\n", + "ax.set_yticks(np.arange(len(gsReg1[132])))\n", + " \n", + "ax.set_xticklabels(timeListLab)\n", + "ax.set_yticklabels(map(relabelAnnot, gsReg1[132]))\n", + " \n", + "ax.set_xlabel('Stage')\n", + "ax.set_ylabel('Gene')\n", + " \n", + "plt.setp(ax.get_xticklabels(), rotation=0, ha=\"right\",\n", + " rotation_mode=\"anchor\")\n", + " \n", + "plt.vlines([4.5,9.5,14.5],-0.5, 16.5,linewidth=3,color='w') \n", + "plt.vlines(np.arange(20)-0.5, -0.5, 16.5, linewidth=0.3,color='w')\n", + "plt.hlines(np.arange(17)-0.5, -0.5,19.5, linewidth=0.3,color='w')\n", + "\n", + "plt.savefig(\"figs/fig6/modCompGenes.png\", dpi=300)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scoress[16]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scoresRefT[40]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Supp Tables - S1, S2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gsReg = gsReg1[132]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gsReg1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import importlib\n", + "importlib.reload(b)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"rules.csv\", \"w\") as fout: \n", + " ss = []\n", + " for g in sorted(gsReg):\n", + " print(g)\n", + " ss.append(b.report_fs_csv(g, fsT))\n", + " \n", + " fout.write(\"\\n\".join(ss))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fs = b.merge_bterms(fsT, 'AP1_2', b.max1_fs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def nb_fs(items, eq=b.eq_tt):\n", + " nb_items = []\n", + " for i, item in enumerate(items):\n", + " print(i)\n", + " if any([eq(item, ui) for ui in nb_items]): continue\n", + " else: nb_items.append(item)\n", + " \n", + " return nb_items" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%prun nb_fs(fs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit b.eq_tt(b.rm_score(fsT[132]['AP1_2'][0]), b.rm_score(fsT[132]['AP1_2'][1]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(fs_u)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fsT[132]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"hyp.csv\", \"w\") as fout: \n", + " fout.write(\"\\n\".join([b.report_fsD1_csv(g, fsT, fsT_D1) \n", + " for g in sorted(gsReg)]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Statistical tests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rand_scores_ts = np.concatenate([list(map(snd, scoresRandT[t])) for t in tpoints])\n", + "ref_scores_ts = np.concatenate([list(map(snd, scoresRefT[t])) for t in tpoints])\n", + "ref_scores_ts_40 = np.concatenate([list(map(snd, scoresRefT[t])) for t in tpoints[1:]])\n", + "lin_scores_ts = np.concatenate([list(map(snd, scoresLinT[(lin.prev(t), t)])) for t in tpoints if lin.prev(t)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import ttest_rel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for t in tpoints:\n", + " if lin.prev(t):\n", + " print(t)\n", + " (t, pval) = ttest_rel(list(map(snd, scoresRefT[t])), list(map(snd, scoresLinT[(lin.prev(t), t)])))\n", + " print(round(pval, 4))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for t in tpoints:\n", + " print(t)\n", + " (t, pval) = ttest_rel(list(map(snd, scoresRefT[t])), list(map(snd, scoresHypT[t])))\n", + " print(round(pval, 4))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for t in tpoints:\n", + " print(t)\n", + " (t, pval) = ttest_rel(list(map(snd, scoresRefT[t])), list(map(snd, scoresRandT[t])))\n", + " print(round(pval, 4))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def nGreaterThan(xs, v): return sum([1 for x in xs if x > v])\n", + "\n", + "def pGreaterThan(xs, v): return nGreaterThan(xs, v) / len(xs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pGreaterThan(list(map(snd, scoresLinT[(10, 40)])), 0.75)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pGreaterThan(list(map(snd, scoresLinT[(120, 132)])), 0.75)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pGreaterThan(list(map(snd, scoresLinT[(96, 120)])), 0.75)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scoresRefT[132]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scoresLinT[(120, 132)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/geneRegulation/grn.py b/geneRegulation/grn.py new file mode 100644 index 0000000000000000000000000000000000000000..ae165572cd4333df60a324a5319b7b9a07ba0101 --- /dev/null +++ b/geneRegulation/grn.py @@ -0,0 +1,210 @@ +class Act(object): + def __init__(self): + pass + + def __repr__(self): + return "-->" + +class Repr(object): + def __init__(self): + pass + + def __repr__(self): + return "--|" + +class Interaction(object): + def __init__(self, src, dest, op): + self.src = src + self.dest = dest + self.op = op + + def __repr__(self): + return (self.src + + " " + repr(self.op) + " " + + self.dest) + + def __hash__(self): + return hash((self.src, repr(self.op), self.dest)) + + def __eq__(self, other): + return (self.src == other.src and + self.dest == other.dest and + repr(self.op) == repr(other.op)) + + def isAct(self): + return repr(self.op) == "-->" + + def isRepr(self): + return repr(self.op) == "--|" + + def rename(self, g, ng): + if self.src == g: self.src = ng + if self.dest == g: self.dest = ng + + return + + def isLoop(self): + return self.src == self.dest + + +class GRN(object): + + def __init__(self, fn): + self.interactions = self.parseGRN(fn) + return + + def _parseAct(self, ln): + genes = ln.split("-->") + if len(genes) == 2: + src = genes[0].strip() + dest = genes[1].strip() + return Interaction(src, dest, Act()) + else: + return None + + def _parseRepr(self, ln): + genes = ln.split("--|") + if len(genes) == 2: + src = genes[0].strip() + dest = genes[1].strip() + return Interaction(src, dest, Repr()) + else: + return None + + def _parseInteraction(self, ln): + actIntr = self._parseAct(ln) + reprIntr = self._parseRepr(ln) + + if actIntr: + return actIntr + elif reprIntr: + return reprIntr + else: + raise Exception("parse error") + + def parseGRN(self, fn): + interactions = [] + with open(fn, 'r') as f: + for line in f: + intr = self._parseInteraction(line) + interactions.append(intr) + + return interactions + + def toPairs(self): + return [(intr.src, intr.dest) for intr in self.interactions] + + + def getInteractions(self, geneNm): + return [intr for intr in self.interactions if geneNm in intr.dest] + + def filterInteractions(self, geneNms): + self.interactions = [intr for intr in self.interactions + if intr.src in geneNms and intr.dest in geneNms] + return + + def noDups(self): + self.interactions = list(set(self.interactions)) + + def renameGene(self, g, ng): + for intr in self.interactions: + intr.rename(g, ng) + + def diff(self, other): + return list(set.difference(self.interactions, other.interactions)) + + def getUniqGeneNames(self): + from functools import reduce + + genePairs = [[intr.src, intr.dest] for intr in self.interactions] + + return set(reduce(lambda x, y:x+y, genePairs)) + + +def mkDegr(): + degrRepr = "degradationOne 1 0" + degrRepr += "\n" + degrRepr += "0.1" + + return degrRepr + +def mkHill(idxs, interactions): + #interactions with same dest + nActs = sum([1 for intr in interactions if intr.isAct()]) + nReprs = sum([1 for intr in interactions if intr.isRepr()]) + vMax = 0.1 + K = 0.5 + n = 2 + nParams = (nActs + nReprs)*2 + 1 + nLevels = 2 + + hillRepr = " ".join(["hill", str(nParams), str(nLevels), str(nActs), str(nReprs)]) + hillRepr += "\n" + hillRepr += str(vMax) + hillRepr += "\n" + + for i in range(len(interactions)): + hillRepr += "\n".join([str(K), str(n)]) + hillRepr += "\n" + + + for intr in interactions: + hillRepr += str(idxs[intr.src]) + hillRepr += (" #" + repr(intr)) + hillRepr += "\n" + + return hillRepr + +def mkNet(geneNms, gn): + sgenes = set(geneNms) + idxs = dict([(g, i+5) for i, g in enumerate(geneNms)]) + netRepr = "" + + for g in geneNms: + gIntrs = [gi for gi in gn.getInteractions(g) if gi.src in sgenes] + gIntrsF = [] + for gi in gIntrs: + if gi.isAct(): gIntrsF.append((gi, 1)) + else: gIntrsF.append((gi, 2)) + + gIntrsSorted = map(lambda x: x[0], sorted(gIntrsF, key=lambda x: x[1])) + + if not gIntrsSorted: + netRepr += " ".join([g, str(idxs[g]), "0"]) + else: + netRepr += " ".join([g, str(idxs[g]), "2"]) + netRepr += "\n" + netRepr += mkHill(idxs, gIntrsSorted) + netRepr += "\n" + netRepr += mkDegr() + + netRepr += "\n" + netRepr += "\n" + + return netRepr + + +def getInteractionsGenes(geneNms, gn): + sgenes = set(geneNms) + + allIntrs = [] + + for g in geneNms: + inputs = tuple([gi.src for gi in gn.getInteractions(g) if gi.src in sgenes]) + allIntrs.append((inputs, g)) + + return allIntrs + +def getInteractionsGenesT(geneNms, gn): + sgenes = set(geneNms) + + allIntrs = [] + + for g in geneNms: + acts = tuple([gi.src for gi in gn.getInteractions(g) if gi.src in sgenes and gi.isAct()]) + reprs = tuple([gi.src for gi in gn.getInteractions(g) if gi.src in sgenes and gi.isRepr()]) + + allIntrs.append((acts, reprs, g)) + + return allIntrs + diff --git a/geneRegulation/random_net.ipynb b/geneRegulation/random_net.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..99af55a0fc728f620dac1583a6569c663116372f --- /dev/null +++ b/geneRegulation/random_net.ipynb @@ -0,0 +1,312 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import print_function\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import _pickle as cPickle\n", + "import grn\n", + "import lin\n", + "import boolFs as b\n", + "from mpl_toolkits.mplot3d import Axes3D\n", + "import analyseExprs as a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "G = grn.GRN(\"../data/boolNets/refInteractions_analysis_20191216.txt\")\n", + "geneNms = G.getUniqGeneNames()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"../data/boolNets/fsT_BAcc_new.pkl\", \"rb\") as fin:\n", + " fsT_s = cPickle.load(fin)\n", + " \n", + "fsT = b.map_fst(fsT_s, b.deserialise_f)\n", + "fs = b.get_best(fsT)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "intrs = grn.getInteractionsGenesT(geneNms, G)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def snd(x): return x[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def mk_random_intrs(intrs):\n", + " #given a skeleton replace inputs with random inputs\n", + " all_genes = set(map(lambda x: x[2], intrs))\n", + " rand_intrs = []\n", + " for intr in intrs:\n", + " acts, reprs, out = intr\n", + " gs = list(all_genes.difference(set([out])))\n", + " nacts = tuple(np.random.choice(gs, len(acts), replace=False))\n", + " nreprs = tuple(np.random.choice(list(set.difference(set(gs), set(nacts))), \n", + " len(reprs), \n", + " replace=False))\n", + " rand_intrs.append((nacts, nreprs, out))\n", + " \n", + " return rand_intrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def randomF(intr):\n", + " acts, reprs, out = intr\n", + " nActs = len(acts)\n", + " nReprs = len(reprs)\n", + " fActs = []\n", + " fReprs = []\n", + " \n", + " fs = [b.lor, b.land]\n", + " \n", + " if acts:\n", + " fActs = np.random.choice(fs, nActs-1)\n", + " \n", + " if reprs:\n", + " fReprs = np.random.choice(fs, nReprs-1)\n", + " \n", + " return (acts, reprs, out, tuple(fActs), tuple(fReprs))\n", + "\n", + "def randomFs(intrs):\n", + " gintrs = dict()\n", + " for intr in intrs:\n", + " _, _, out = intr\n", + " gintrs[out] = randomF(intr)\n", + " \n", + " return gintrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def random_net(intrs, tpoints=[10, 40, 96, 120, 132]):\n", + " random_intrs = mk_random_intrs(intrs)\n", + " fs = {}\n", + " for t in tpoints:\n", + " #cells = a.readDataT(d=d, dExprs=dExprs, t=t)\n", + " fs[t] = randomFs(random_intrs)\n", + " #fs[t] = b.getFs(cells=cells, intrs=random_intrs, f=b.checkFs1)\n", + " \n", + " return fs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "randomFs(mk_random_intrs(intrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "def get_expr_locs(ts, g):\n", + " return np.array([(c.pos.x, c.pos.y) for c in ts if c.exprs[g]])\n", + "\n", + "def get_genes(gsReg, tss):\n", + " gss = defaultdict(list)\n", + " for t in tss.keys():\n", + " for g in gsReg:\n", + " if get_expr_locs(tss[t], g).size != 0:\n", + " gss[t].append(g)\n", + " \n", + " return gss\n", + "\n", + "def cmpTss(tss, tss1, gs, cmpFn):\n", + " resT = defaultdict(list)\n", + " \n", + " for t in tss.keys():\n", + " for g in gs[t]:\n", + " resT[t].append((g, cmpFn(tss[t], tss1[t], g)))\n", + " \n", + " return resT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "geneNms" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tss, linss = lin.mkSeries()\n", + "lin.filterL1(tss)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tss[132].cells[120].exprs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gsReg1 = dict()\n", + "for t in [10, 40, 96, 120, 132]:\n", + " gsReg1[t] = [g for g in geneNms if b.acts(fs[t][g])]\n", + "cmpFn = b.ts_bacc\n", + "\n", + "n=100\n", + "scoress = []\n", + "for i in range(n):\n", + " print(i)\n", + " fs = random_net(intrs)\n", + " tssRand = lin.updateTsBF(fs=fs, tss=tss)\n", + " scoresRand = cmpTss(tss, tssRand, gsReg1, cmpFn)\n", + " scoress.append(scoresRand)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fs = random_net(intrs)\n", + "tssRand = lin.updateTsBF(fs=fs, tss=tss)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def fst(x): return x[0]\n", + "\n", + "def getA(kxs, ks):\n", + " res = [v for k, v in kxs if k == ks]\n", + " \n", + " if res: return res[0]\n", + " else: return None\n", + "\n", + "def add(s1, s2):\n", + " s3 = defaultdict(list)\n", + " for t in s1.keys():\n", + " for g in map(fst, s1[t]):\n", + " s3[t].append((g, getA(s1[t], g) + getA(s2[t], g)))\n", + " \n", + " return s3\n", + "\n", + "def div(s, d):\n", + " sd = defaultdict(list)\n", + " for t in s.keys():\n", + " for g in map(fst, s[t]):\n", + " sd[t].append((g, getA(s[t], g) / d))\n", + " \n", + " return sd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from functools import reduce\n", + "scoress_ = div(reduce(add, scoress), 100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scoress_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"scoressRand_new.pkl\", \"wb\") as fout:\n", + " cPickle.dump(scoress_, fout)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}