I need help to cut down on computation time. I have been trying in vain for the past 10 hours to use only 7 instead of 63 decimals. I keep getting the error messages that there is nothing to plot. From my limited understanding of Python 2.7, I believe that the plotting function is expecting float64 instead of float8. I have copy-pasted my Python 2.7 code below. Can somebody please change it so that it will plot float8 time series trajectories? Thanks a lot in advance..
Please see the proper formatted Python 2.7 code in attached MS Word file because it is not showing up well in this question text editor text box.
from __future__ import print_function import os import timeit import argparse import pandas as pd import matplotlib.pyplot as plt import gpl import conf.settings from util import count_samples from correlation import CorrelationMatrix class ExpressionMatrix(object): def __init__(self, platform=None, series=None, invert=False, limit=0, top=10, **kwargs): data_path = conf.settings.DATA_PATH self.sample_number = 0 self.invert = invert self.top = top if series: file_path = os.path.join(data_path, series+'.csv') self.df = pd.read_csv(file_path, index_col=0) sample_number = count_samples(self.df) #print(self.df) print(self.df.dtypes) print(self.df.shape) self.df.iloc[:,:sample_number] = self.df.iloc[:,:sample_number].astype('float32') printself.df.info) elif platform: count = 0 platform = gpl.Platform(args.platform, parse=False, meta_only=True) series = platform.get_series(download=False) for index, dataset in enumerate(series): file_path = os.path.join(data_path, dataset+'.csv') if not os.path.exists(file_path): file_path = os.path.join(data_path, dataset+'.tar.csv') if not os.path.exists(file_path): continue df = pd.read_csv(file_path, index_col=0) count += 1 sample_number = count_samples(df) expression_matrix = df.iloc[:,:sample_number] if count == 1: matrix = expression_matrix else: matrix = pd.concat([matrix, expression_matrix], axis=1) print('Concated matrix: %s' % dataset, matrix.shape) if limit: if count > limit: break annotations = df.iloc[:,sample_number:] self.df = pd.concat([matrix, annotations], axis=1) self.sample_number = count_samples(self.df) for key, value in kwargs.items(): setattr(self, key, value) if self.unlog: self.df.iloc[:,:sample_number] = 2**self.df.iloc[:,:sample_number] def correlations(self): return CorrelationMatrix(self) def main(args): expressions = ExpressionMatrix(**vars(args)) if args.load: correlations = CorrelationMatrix(expressions, calc=False) correlations.load() else: correlations = expressions.correlations() if args.save: correlations.save() print(correlations.df.shape) print(args.similarity) times = [] if args.choices: for i in range(args.trials): start_time = timeit.default_timer() correlations.correlate(args.choices) stop_time = timeit.default_timer() difference = stop_time - start_time times.append(difference) print('Average duration: ', sum(times)/len(times)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--series', '-s', type=str) parser.add_argument('--platform', '-p', type=str) parser.add_argument('--invert', '-i', action='store_true') parser.add_argument('--choices', '-c', type=str, nargs='+', default='') parser.add_argument('--limit', '-l', type=int, default=0) parser.add_argument('--top', '-t', type=int, default=10) parser.add_argument('--similarity', '-sim', type=str, default='pearson', help='''Method of similarity measure which can be either pearson, kendall, spearman (default: pearson).''') parser.add_argument('--trials', '-tr', type=int, default=1) parser.add_argument('--plot', '-plt', action='store_true') parser.add_argument('--unlog', '-ul', action='store_true') parser.add_argument('--save', '-sa', action='store_true') parser.add_argument('--load', '-lo', action='store_true') args = parser.parse_args() main(args)