Source code for vuecore.wgcna

import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.subplots as tools
import scipy as scp
from acore import wgcna_analysis

from . import color_list, dendrogram


[docs] def get_module_color_annotation( map_list, col_annotation=False, row_annotation=False, bygene=False, module_colors=[], dendrogram=[], ): """ This function takes a list of values, converts them into colors, and creates a new plotly object to be used as an annotation. Options module_colors and dendrogram only apply when map_list is a list of experimental features used in module eigenegenes calculation. :param list map_list: dendrogram leaf labels. :param bool col_annotation: if True, adds color annotations as a row. :param bool row_annotation: if True, adds color annotations as a column. :param bool bygene: determines wether annotation colors have to be reordered to match dendrogram leaf labels. :param list module_colors: dendrogram leaf module color. :param dict dendrogram: dendrogram represented as a plotly object figure. :return: Plotly object figure. .. note:: map_list and module_colors must have the same length. """ colors_dict = color_list.make_color_dict() n = len(map_list) val = 1 / (n - 1) number = 0 colors = [] vals = [] # Use if color annotation is for experimental features in dendrogram if bygene: module_colors = [i.lower().replace(" ", "") for i in module_colors] gene_colors = dict(zip(map_list, module_colors)) for i in map_list: name = gene_colors[i] color = colors_dict[name] n = number colors.append([round(n, 4), color]) vals.append((i, round(n, 4))) number = n + val labels = list(dendrogram["layout"]["xaxis"]["ticktext"]) y = [1] * len(labels) df = pd.DataFrame([labels, y], index=["labels", "y"]).T df["vals"] = df["labels"].map(dict(vals)) # Use if map_list is a list of co-expression modules names else: for i in map_list: name = i.split("ME") if len(name) == 2: name = name[1] color = colors_dict[name] n = number colors.append([round(n, 4), color]) vals.append((i, round(n, 4))) number = n + val else: name = name[0] n = number colors.append([round(n, 4), "#ffffff"]) vals.append((i, round(n, 4))) number = n + val y = [1] * len(map_list) df = pd.DataFrame([map_list, y], index=["labels", "y"]).T df["vals"] = df["labels"].map(dict(vals)) if row_annotation and col_annotation: r_annot = go.Heatmap( z=df.vals, x=df.y, y=df.labels, showscale=False, colorscale=colors, xaxis="x", yaxis="y", ) c_annot = go.Heatmap( z=df.vals, x=df.labels, y=df.y, showscale=False, colorscale=colors, xaxis="x2", yaxis="y2", ) return r_annot, c_annot elif row_annotation: r_annot = go.Heatmap( z=df.vals, x=df.y, y=df.labels, showscale=False, colorscale=colors, xaxis="x2", yaxis="y2", ) return r_annot elif col_annotation: c_annot = go.Heatmap( z=df.vals, x=df.labels, y=df.y, showscale=False, colorscale=colors, xaxis="x2", yaxis="y2", ) return c_annot return None
[docs] def get_heatmap(df, colorscale=None, color_missing=True): """ This function plots a simple Plotly heatmap. :param df: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns. :param list[list] colorscale: heatmap colorscale (e.g. [[0,'#67a9cf'],[0.5,'#f7f7f7'],[1,'#ef8a62']]). If colorscale is not defined, will take [[0, 'rgb(255,255,255)'], [1, 'rgb(255,51,0)']] as default. :param bool color_missing: if set to True, plots missing values as grey in the heatmap. :return: Plotly object figure. """ figure = {} if df is not None: if colorscale: colors = colorscale else: colors = [[0, "rgb(255,255,255)"], [1, "rgb(255,51,0)"]] figure = {"layout": {"template": None}, "data": []} figure["layout"]["template"] = "plotly_white" figure["data"].append( go.Heatmap( z=df.values.tolist(), y=list(df.index), x=list(df.columns), colorscale=colors, showscale=True, colorbar=dict( x=1, y=0, xanchor="left", yanchor="bottom", len=0.35, thickness=15 ), ) ) if color_missing: df_missing = wgcna_analysis.get_miss_values_df(df) figure["data"].append( go.Heatmap( z=df_missing.values.tolist(), y=list(df.index), x=list(df.columns), colorscale=[[0, "rgb(201,201,201)"], [1, "rgb(201,201,201)"]], showscale=False, ) ) return figure
[docs] def plot_labeled_heatmap( df, textmatrix, title, colorscale=[[0, "rgb(0,255,0)"], [0.5, "rgb(255,255,255)"], [1, "rgb(255,0,0)"]], width=1200, height=800, row_annotation=False, col_annotation=False, ): """ This function plots a simple Plotly heatmap with column and/or row annotations and heatmap annotations. :param df: pandas dataframe containing data to be plotted in the heatmap. :param textmatrix: pandas dataframe with heatmap annotations as values. :param str title: the title of the figure. :param list[list] colorscale: heatmap colorscale (e.g. [[0,'rgb(0,255,0)'],[0.5,'rgb(255,255,255)'],[1,'rgb(255,0,0)']]) :param int width: the width of the figure. :param int height: the height of the figure. :param bool row_annotation: if True, adds a color-coded column at the left of the heatmap. :param bool col_annotation: if True, adds a color-coded row at the bottom of the heatmap. :return: Plotly object figure. """ figure = {} if df is not None: figure = get_heatmap(df, colorscale=colorscale, color_missing=False) figure["data"].append( get_module_color_annotation( list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False, ) ) annotations = [] for n, row in enumerate(textmatrix.values): for m, val in enumerate(row): annotations.append( go.layout.Annotation( text=str(textmatrix.values[n][m]), font=dict(size=8), x=df.columns[m], y=df.index[n], xref="x", yref="y", showarrow=False, ) ) layout = go.Layout( width=width, height=height, title=title, xaxis=dict( domain=[0.015, 1], autorange=True, showgrid=False, zeroline=False, showline=False, ticks="", showticklabels=True, automargin=True, anchor="y", ), yaxis=dict( autorange="reversed", ticklen=5, ticks="outside", tickcolor="white", showticklabels=False, automargin=True, showgrid=False, anchor="x", ), xaxis2=dict( domain=[0, 0.01], autorange=True, showgrid=False, zeroline=False, showline=False, ticks="", showticklabels=False, automargin=True, anchor="y2", ), yaxis2=dict( autorange="reversed", showgrid=False, zeroline=False, showline=False, ticks="", showticklabels=True, automargin=True, anchor="x2", ), ) figure["layout"] = layout figure["layout"]["template"] = "plotly_white" figure["layout"].update(annotations=annotations) return figure
[docs] def plot_dendrogram_guidelines(Z_tree, dendrogram): """ This function takes a dendrogram tree dictionary and its plotly object and creates shapes to be plotted as vertical dashed lines in the dendrogram. :param dict Z_tree: dictionary of data structures computed to render the dendrogram. Keys: 'icoords', 'dcoords', 'ivl' and 'leaves'. :param dendrogram: dendrogram represented as a plotly object figure. :return: List of dictionaries. """ shapes = [] if dendrogram is not None: tickvals = list(dendrogram["layout"]["xaxis"]["tickvals"]) maximum = len(tickvals) step = int(maximum / 8) minimum = int(0 + step) keys = ["type", "x0", "y0", "x1", "y1", "line"] line_keys = ["color", "width", "dash"] line_vals = ["rgb(192,192,192)", 0.1, "dot"] line = dict(zip(line_keys, line_vals)) values = [] for i in tickvals[minimum::step]: values.append(("line", i, 0.3, i, np.max(Z_tree["dcoord"]))) values = [list(i) + [line] for i in values] shapes = [] for i in values: d = dict(zip(keys, i)) shapes.append(d) return shapes
[docs] def plot_intramodular_correlation( MM, FS, feature_module_df, title, width=1000, height=800 ): """ This function uses the Feature significance and Module Membership measures, and plots a multi-scatter plot of all modules against all clinical traits. :param MM: pandas dataframe with module membership data :param FS: pandas dataframe with feature significance data :param feature_module_df: pandas DataFrame of experimental features and module colors (use mode='dataframe' in get_FeaturesPerModule) :param str title: plot title :param int width: plot width :param int height: plot height :return: Plotly object figure. Example:: plot = plot_intramodular_correlation(MM, FS, feature_module_df, title='Plot', width=1000, height=800): .. note:: There is a limit in the number of subplots one can make in Plotly. This function limits the number of modules shown to 5. """ figure = {} if MM is not None: MM = MM.iloc[:, -6] MM["modColor"] = MM.index.map( feature_module_df.set_index("name")["modColor"].get ) figure = tools.make_subplots( rows=len(FS.columns), cols=len(MM.columns) - 1, shared_xaxes=False, shared_yaxes=False, vertical_spacing=0.015, horizontal_spacing=0.1, print_grid=True, ) figure.layout.template = "plotly_white" layout = dict(width=width, height=height, showlegend=False, title=title) figure.layout.update(layout) axis_dict = {} for i, j in enumerate(MM.columns[MM.columns.str.startswith("MM")]): n_p = len(FS.columns) * (len(MM.columns) - 1) - len( MM.columns[MM.columns.str.startswith("MM")] ) axis_dict["xaxis{}".format(n_p + i + 1)] = dict( title=j, titlefont=dict(size=13) ) print(axis_dict) n = 1 for a, b in enumerate(FS.columns): name = b.split(" ") if len(name) > 1: label = ["<br>".join(name[i : i + 3]) for i in range(0, len(name), 3)][ 0 ] else: label = name[0] axis_dict["yaxis{}".format(a + n)] = dict( title=label, titlefont=dict(size=13) ) n += len(MM.columns[MM.columns.str.startswith("MM")]) - 1 annotation = [] x_axis = 1 y_axis = 1 for a, b in enumerate(FS.columns): for i, j in enumerate(MM.columns[MM.columns.str.startswith("MM")]): name = MM[MM["modColor"] == j[2:]].index x = abs(MM[MM["modColor"] == j[2:]][j].values) y = abs(FS[FS.index.isin(name)][b].values) slope, intercept, r_value, p_value, std_err = scp.stats.linregress(x, y) line = slope * x + intercept figure.append_trace( go.Scattergl( x=x, y=y, text=name, mode="markers", opacity=0.7, marker={ "size": 7, "color": "white", "line": {"width": 1.5, "color": j[2:]}, }, ), a + 1, i + 1, ) figure.append_trace( go.Scattergl(x=x, y=line, mode="lines", marker={"color": "black"}), a + 1, i + 1, ) annot = dict( x=0.7, y=0.7, xref="x{}".format(x_axis), yref="y{}".format(y_axis), text="R={:0.2}, p={:.0e}".format(r_value, p_value), showarrow=False, ) annotation.append(annot) x_axis += 1 y_axis += 1 figure.layout.update(axis_dict) figure.layout.update(annotations=annotation) return figure
[docs] def plot_complex_dendrogram( dendro_df, subplot_df, title, dendro_labels=[], distfun="euclidean", linkagefun="average", hang=0.04, subplot="module colors", subplot_colorscale=[], color_missingvals=True, row_annotation=False, col_annotation=False, width=1000, height=800, ): """ This function plots a dendrogram with a subplot below that can be a heatmap (annotated or not) or module colors. :param dendro_df: pandas dataframe containing data used to generate dendrogram, columns will result in dendrogram leaves. :param subplot_df: pandas dataframe containing data used to generate plot below dendrogram. :param str title: the title of the figure. :param list dendro_labels: list of strings for dendrogram leaf nodes labels. :param str distfun: distance measure to be used (‘euclidean‘, ‘maximum‘, ‘manhattan‘, ‘canberra‘, ‘binary‘, ‘minkowski‘ or ‘jaccard‘). :param str linkagefun: hierarchical/agglomeration method to be used (‘single‘, ‘complete‘, ‘average‘, ‘weighted‘, ‘centroid‘, ‘median‘ or ‘ward‘). :param float hang: height at which the dendrogram leaves should be placed. :param str subplot: type of plot to be shown below the dendrogram (´module colors´ or ´heatmap´). :param list subplot_colorscale: colorscale to be used in the subplot. :param bool color_missingvals: if set to `True`, plots missing values as grey in the heatmap. :param bool row_annotation: if `True`, adds a color-coded column at the left of the heatmap. :param bool col_annotation: if `True`, adds a color-coded row at the bottom of the heatmap. :param int width: the width of the figure. :param int height: the height of the figure. :return: Plotly object figure. """ figure = {} dendro_tree = wgcna_analysis.get_dendrogram( dendro_df, dendro_labels, distfun=distfun, linkagefun=linkagefun, div_clusters=False, ) if dendro_tree is not None: dendrogram_ = dendrogram.plot_dendrogram( dendro_tree, hang=hang, cutoff_line=False ) layout = go.Layout( width=width, height=height, showlegend=False, title=title, xaxis=dict( domain=[0, 1], range=[ np.min(dendrogram_["layout"]["xaxis"]["tickvals"]) - 6, np.max(dendrogram_["layout"]["xaxis"]["tickvals"]) + 4, ], showgrid=False, zeroline=True, ticks="", automargin=True, anchor="y", ), yaxis=dict( domain=[0.7, 1], autorange=True, showgrid=False, zeroline=False, ticks="outside", title="Height", automargin=True, anchor="x", ), xaxis2=dict( domain=[0, 1], autorange=True, showgrid=True, zeroline=False, ticks="", showticklabels=False, automargin=True, anchor="y2", ), yaxis2=dict( domain=[0, 0.64], autorange=True, showgrid=False, zeroline=False, automargin=True, anchor="x2", ), ) if subplot == "module colors": figure = tools.make_subplots(rows=2, cols=1, print_grid=False) for i in list(dendrogram_["data"]): figure.append_trace(i, 1, 1) shapes = plot_dendrogram_guidelines(dendro_tree, dendrogram_) moduleColors = get_module_color_annotation( dendro_labels, col_annotation=col_annotation, bygene=True, module_colors=subplot_df, dendrogram=dendrogram_, ) figure.append_trace(moduleColors, 2, 1) figure["layout"] = layout figure.layout.template = "plotly_white" figure["layout"].update( { "shapes": shapes, "xaxis": dict(showticklabels=False), "yaxis": dict(domain=[0.2, 1]), "yaxis2": dict( domain=[0, 0.19], title="Module colors", ticks="", showticklabels=False, ), } ) elif subplot == "heatmap": if not all( list( subplot_df.columns.map( lambda x: subplot_df[x].between(-1, 1, inclusive=True).all() ) ) ): df = wgcna_analysis.get_percentiles_heatmap( subplot_df, dendro_tree, bydendro=True, bycols=False ).T else: df = wgcna_analysis.df_sort_by_dendrogram( wgcna_analysis.df_sort_by_dendrogram(subplot_df, dendro_tree).T, dendro_tree, ) heatmap = get_heatmap( df, colorscale=subplot_colorscale, color_missing=color_missingvals ) if row_annotation and col_annotation: figure = tools.make_subplots( rows=3, cols=2, specs=[[{"colspan": 2}, None], [{}, {}], [{"colspan": 2}, None]], print_grid=False, ) for i in list(dendrogram_["data"]): figure.append_trace(i, 1, 1) for j in list(heatmap["data"]): figure.append_trace(j, 2, 2) r_annot, c_annot = get_module_color_annotation( list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False, ) figure.append_trace(r_annot, 2, 1) figure.append_trace(c_annot, 3, 1) figure["layout"] = layout figure.layout.template = "plotly_white" figure["layout"].update( { "xaxis": dict(ticks="", showticklabels=False, anchor="y"), "xaxis2": dict( domain=[0, 0.01], ticks="", showticklabels=False, automargin=True, anchor="y2", ), "xaxis3": dict( domain=[0.015, 1], ticks="", showticklabels=False, automargin=True, anchor="y3", ), "xaxis4": dict( domain=[0.015, 1], ticks="", showticklabels=True, automargin=True, anchor="y4", ), "yaxis": dict(domain=[0.635, 1], automargin=True, anchor="x"), "yaxis2": dict( domain=[0.015, 0.635], autorange="reversed", ticks="", showticklabels=True, automargin=True, anchor="x2", ), "yaxis3": dict( domain=[0.01, 0.635], autorange="reversed", ticks="", showticklabels=False, automargin=True, anchor="x3", ), "yaxis4": dict( domain=[0, 0.01], ticks="", showticklabels=False, automargin=True, anchor="x4", ), } ) elif not row_annotation and not col_annotation: figure = tools.make_subplots(rows=2, cols=1, print_grid=False) for i in list(dendrogram_["data"]): figure.append_trace(i, 1, 1) for j in list(heatmap["data"]): figure.append_trace(j, 2, 1) figure["layout"] = layout figure.layout.template = "plotly_white" figure.layout.update( { "xaxis": dict( ticktext=np.array( dendrogram_["layout"]["xaxis"]["ticktext"] ), tickvals=list(dendrogram_["layout"]["xaxis"]["tickvals"]), ), "yaxis2": dict(autorange="reversed"), } ) elif row_annotation: figure = tools.make_subplots( rows=2, cols=2, specs=[[{"colspan": 2}, None], [{}, {}]], print_grid=False, ) for i in list(dendrogram_["data"]): figure.append_trace(i, 1, 1) for j in list(heatmap["data"]): figure.append_trace(j, 2, 2) r_annot = get_module_color_annotation( list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False, ) figure.append_trace(r_annot, 2, 1) figure["layout"] = layout figure.layout.template = "plotly_white" figure["layout"].update( { "xaxis": dict( domain=[0.015, 1], ticktext=np.array( dendrogram_["layout"]["xaxis"]["ticktext"] ), tickvals=list(dendrogram_["layout"]["xaxis"]["tickvals"]), automargin=True, anchor="y", ), "xaxis2": dict( domain=[0, 0.010], ticks="", showticklabels=False, automargin=True, anchor="y2", ), "xaxis3": dict( domain=[0.015, 1], ticks="", showticklabels=False, automargin=True, anchor="y3", ), "yaxis": dict(automargin=True, anchor="x"), "yaxis2": dict( autorange="reversed", ticks="", showticklabels=True, automargin=True, anchor="x2", ), "yaxis3": dict( domain=[0, 0.64], ticks="", showticklabels=False, automargin=True, anchor="x3", ), } ) elif col_annotation: figure = tools.make_subplots( rows=3, cols=1, specs=[[{}], [{}], [{}]], print_grid=False ) for i in list(dendrogram_["data"]): figure.append_trace(i, 1, 1) for j in list(heatmap["data"]): figure.append_trace(j, 3, 1) c_annot = get_module_color_annotation( list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False, ) figure.append_trace(c_annot, 2, 1) figure["layout"] = layout figure.layout.template = "plotly_white" figure["layout"].update( { "xaxis": dict( ticktext=np.array( dendrogram_["layout"]["xaxis"]["ticktext"] ), tickvals=list(dendrogram_["layout"]["xaxis"]["tickvals"]), automargin=True, anchor="y", ), "xaxis2": dict( ticks="", showticklabels=False, automargin=True, anchor="y2" ), "xaxis3": dict( domain=[0, 1], ticks="", showticklabels=False, automargin=True, anchor="y3", ), "yaxis": dict(domain=[0.70, 1], automargin=True, anchor="x"), "yaxis2": dict( domain=[0.615, 0.625], ticks="", showticklabels=False, automargin=True, anchor="x2", ), "yaxis3": dict( domain=[0, 0.61], autorange="reversed", ticks="", showticklabels=False, automargin=True, anchor="x3", ), } ) return figure