diff --git a/solventmapcreator/clusteringanalysis/clusteranalysis.py b/solventmapcreator/clusteringanalysis/clusteranalysis.py new file mode 100644 index 0000000000000000000000000000000000000000..587e3ea5f4278bdf1f08bc508a23932586f3338a --- /dev/null +++ b/solventmapcreator/clusteringanalysis/clusteranalysis.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Script for performing the linkage calculations and outputting them to file +and plots. + +@author: mark +""" + +import logging + +import solventmapcreator.clusteringanalysis.clustercalculation as clustercalculation +import solventmapcreator.clusteringanalysis.clusterstatistics as clusterstatistics +import solventmapcreator.clusteringanalysis.dendrogramplotting as dendrogramplotting +import solventmapcreator.io.linkagewriter as linkagewriter + +logging.basicConfig() +LOGGER = logging.getLogger(__name__) +LOGGER.setLevel(logging.WARN) + +def create_dend_and_stats_all_methods(condensed_matrix, labels, filename_stem, **kwargs): + """This generates the linkage matrices for each method, and returns the + dendrogram and linkage file and statistics for each method. + """ + linkage_matrix_dict = clustering_by_all_methods(condensed_distance_matrix) + output_dict= {} + for method_label, linkage_matrix in linkage_matrix_dict.keys(): + method_filename_stem= filename_stem + '_' + method_label + output_dict[method_label] = create_dendrogram_and_statistics(linkage_matrix, + condensed_matrix, + labels, + method_filename_stem, + **kwargs) + return output_dict + +def create_dendrogram_and_statistics_for_method(method, condensed_matrix, labels, filename_stem, **kwargs): + """This calculates the linkage matrix for the given method, then outputs the + """ + linkage_matrix = clustering_given_method(condensed_matrix, method) + return create_dendrogram_and_statistics(linkage_matrix, condensed_matrix, + labels, filename_stem, **kwargs) + +def create_dendrogram_and_statistics(linkage_matrix, condensed_matrix, labels, filename_stem, **kwargs): + """This generates the dendrogram and linkage files and also calculates the statistics. + """ + linkage_out, dendrogram_out = create_and_write_dendrogram_and_link_file(linkage_matrix, + labels, + filename_stem, + **kwargs) + coph_filename = generate_cophenetic_filename(filename_stem) + coph_out = generate_cophenetic_and_inconsistency_file(linkage_matrix, + condensed_matrix, + coph_filename, **kwargs) + return linkage_out, dendrogram_out, coph_out + +def create_and_write_dendrogram_and_link_file(linkage_matrix, labels, filename_stem, **kwargs): + """This generates the dendrogram and the linkage file. + """ + linkage_filename = generate_linkage_filename(filename_stem) + dendrogram_out = create_and_write_dendogram(linkage_matrix, labels, filename_stem, **kwargs) + linkage_out = write_linkage_matrix(linkage_matrix, linkage_filename) + return linkage_out, dendrogram_out + +def generate_cophenetic_and_inconsistency_file(linkage_matrix, + condensed_distance_matrix, + filename, **kwargs): + """This calculates the cophenetic distances and coefficient for the given matrix + and the inconsistency statistics and outputs to file. + """ + coph_incon_dict = calculate_cophenetic_and_inconsistency(linkage_matrix, + condensed_distance_matrix, + **kwargs) + return write_cophenetic_and_inconsistency(coph_incon_dict["cophenetic_coeff"], + coph_incon_dict["cophenetic_distances"], + coph_incon_dict["inconsistency_matrix"], + filename) + +def generate_cophenetic_filename(filename_stem): + """This generates the cophenetic and inconsistency filenamefrom the given + stem. + """ + return filename_stem + '_coph_inconst.csv + +def generate_linkage_filename(filename_stem): + """This generates the filename for the linkage file. + """ + return filename_stem + '.csv' + +def create_and_write_dendogram(linkage_matrix, labels, filename_stem, **kwargs): + """This creates a dendogram plot and writes to file. + """ + return dendrogramplotting.create_and_write_dendogram(linkage_matrix, labels, + filename_stem, **kwargs) + +def write_linkage_matrix(linkage_matrix, filename): + """This writes the linkage matrix to file. + """ + return linkagewriter.write_linkage_matrix(linkage_matrix, filename) + +def write_cophenetic_and_inconsistency(cophenetic_coefficient, + cophenetic_distances, + inconsistency_matrix, filename): + """This writes out the cophenetic and inconsistency information. + """ + return linkagewriter.write_cophenetic_and_inconsistency(cophenetic_coefficient, + cophenetic_distances, + inconsistency_matrix, + filename) + +def calculate_cophenetic_and_inconsistency(linkage_matrix, condensed_distance_matrix, + **kwargs): + """This calculates the cophenetic distances and coefficient for the given matrix + and the inconsistency statistics. + """ + cophenetic_coeff, cophenetic_distances = calculate_cophenetic_distance(linkage_matrix, + condensed_distance_matrix) + inconsistency_matrix = calculate_inconsistency(linkage_matrix, **kwargs) + return {"cophenetic_coeff":cophenetic_coeff, + "cophenetic_distances":cophenetic_distances, + "inconsistency_matrix":inconsistency_matrix} + +def calculate_inconsistency(linkage_matrix, **kwargs): + """This calculates the inconsistency statistics on the linkage matrix + """ + return clusterstatistics.calculate_inconsistency(linkage_matrix, kwargs.get("d", 2)) + +def calculate_cophenetic_distance(linkage_matrix, condensed_distance_matrix): + """This calculates the cophenetic distances and coefficient for the given matrix. + """ + return clusterstatistics.calculate_cophenetic_distance(linkage_matrix, condensed_distance_matrix) + +def clustering_given_method(condensed_distance_matrix, method): + """This calculates the linkage matrix using the given method only. + """ + return clustercalculation.calculate_clustering(condensed_distance_matrix, method) + +def clustering_by_all_methods(condensed_distance_matrix): + """This calculates the clustering for each method. + """ + return clustercalculation.clustering_by_all_methods(condensed_distance_matrix)