models.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. import tensorflow as tf
  2. from keras.layers import Input, Conv3D, MaxPooling3D, Dropout, BatchNormalization, Reshape, Dense, ELU, concatenate, add, Lambda, MaxPooling2D, GRU, Masking, advanced_activations
  3. from keras.models import Model, save_model
  4. from keras.optimizers import Adam
  5. from keras import backend as K
  6. from tensorflow import reshape, transpose
  7. from keras.callbacks import LearningRateScheduler
  8. from keras.metrics import binary_crossentropy
  9. from keras import activations
  10. import numpy as np
  11. import math
  12. from keras import activations
  13. from keras.utils import CustomObjectScope
  14. from skimage.transform import resize
  15. from tensorflow.python.framework import ops
  16. import innvestigate
  17. import innvestigate.utils
  18. import os
  19. import sys
  20. sys.path.append('//data/data_wnx3/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class-1cnn+data/utils')
  21. from sepconv3D import SeparableConv3D
  22. from augmentation import CustomIterator
  23. model_filepath = '//data/data_wnx3/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class-1cnn+data'
  24. ####for 2 class model + RNN###
  25. class Parameters():
  26. def __init__ (self, param_dict):
  27. self.CNN_w_regularizer = param_dict['CNN_w_regularizer']
  28. self.RNN_w_regularizer = param_dict['RNN_w_regularizer']
  29. self.CNN_batch_size = param_dict['CNN_batch_size']
  30. self.RNN_batch_size = param_dict['RNN_batch_size']
  31. self.CNN_drop_rate = param_dict['CNN_drop_rate']
  32. self.RNN_drop_rate = param_dict['RNN_drop_rate']
  33. self.epochs = param_dict['epochs']
  34. self.gpu = param_dict['gpu']
  35. self.model_filepath = param_dict['model_filepath'] + '/net.h5'
  36. self.num_clinical = param_dict['num_clinical']
  37. self.image_shape = param_dict['image_shape']
  38. self.final_layer_size = param_dict['final_layer_size']
  39. self.optimizer = param_dict['optimizer']
  40. class CNN_Net ():
  41. def __init__ (self, params):
  42. self.params = params
  43. self.xls = Input (shape = (self.params.num_clinical,),name='input_xls')
  44. self.mri = Input (shape = (self.params.image_shape),name='input_mri')
  45. self.jac = Input (shape = (self.params.image_shape),name='input_jac')
  46. xalex3D = XAlex3D(w_regularizer = self.params.CNN_w_regularizer, drop_rate = self.params.CNN_drop_rate, final_layer_size=self.params.final_layer_size)
  47. with tf.device(self.params.gpu):
  48. self.fc_CNN = xalex3D (self.mri, self.jac, self.xls)
  49. self.CNNoutput_class = Dense(units = 2, activation = 'softmax', name = 'CNNclass_output') (self.fc_CNN) #use either 1, sigmoid, binarycrossent OR 2, softmax, sparsecategoricalcrossent
  50. def train (self, data):
  51. train_data, val_data = data
  52. train_samples = train_data[0].shape[0]
  53. val_samples = len(val_data[0])
  54. data_flow_train = CustomIterator (train_data, batch_size = self.params.CNN_batch_size,
  55. shuffle = True)
  56. data_flow_val = CustomIterator (val_data, batch_size = self.params.CNN_batch_size,
  57. shuffle = True)
  58. self.model = Model(inputs = [self.mri,self.jac,self.xls], outputs = [self.CNNoutput_class])
  59. lrate = LearningRateScheduler(step_decay_CNN)
  60. callback = [lrate]
  61. #optimizer = Adam(lr=1e-5)
  62. self.optimizer = self.params.optimizer
  63. self.model.compile(optimizer = self.optimizer, loss = 'sparse_categorical_crossentropy', metrics =['acc'])
  64. self.model.summary()
  65. history = self.model.fit_generator (data_flow_train,
  66. steps_per_epoch = train_samples/self.params.CNN_batch_size,
  67. epochs = self.params.epochs,
  68. callbacks = callback,
  69. shuffle = True, #might be being ignored if the input data is a generator??
  70. validation_data = data_flow_val,
  71. validation_steps = val_samples/self.params.CNN_batch_size)
  72. #Save the model
  73. save_model(self.model,'SavedCNNModel')
  74. self.model.save_weights('SavedCNNWeights')
  75. #get features from last layer
  76. featuresModel = Model(inputs = self.model.input, outputs = self.model.layers[-2].output)
  77. featuresModel.compile(optimizer = self.params.optimizer, loss = 'sparse_categorical_crossentropy', metrics =['acc'])
  78. return history.history, featuresModel
  79. def predict (self, data_test):
  80. test_mri, test_jac, test_xls, test_labels, test_ptid, test_imageID, test_confid, test_csf = data_test
  81. # with open('//data/data_wnx3/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class/figchecks/test_data.txt', 'w') as testdata:
  82. # testdata.write('{}\n'.format(test_data))
  83. # testdata.write('{}\n{}\n{}\n{}\n{}\n'.format(len(test_data),len(test_data[0]),len(test_data[1]),len(test_data[2]),len(test_data[3])))
  84. preds = self.model.predict ([test_mri, test_jac, test_xls])
  85. return preds
  86. def evaluate (self, data_test):
  87. test_mri, test_jac, test_xls, test_labels, test_ptid, test_imageID, test_confid, test_csf = data_test
  88. metrics = self.model.evaluate (x = [test_mri, test_jac, test_xls], y = test_labels, batch_size = self.params.CNN_batch_size)
  89. return metrics
  90. def load_the_weights (self, SavedWeights):
  91. self.model = Model(inputs = [self.mri,self.jac,self.xls], outputs = [self.CNNoutput_class])
  92. self.model.compile(optimizer = self.params.optimizer, loss = 'sparse_categorical_crossentropy', metrics =['acc'])
  93. loaded = self.model.load_weights(SavedWeights)
  94. return loaded
  95. def LRP_heatmap(self, img_data, img_number): #https://github.com/albermax/innvestigate
  96. test_mri, test_jac, test_xls, test_labels, test_ptid, test_imageID, test_confid, test_csf = img_data
  97. #clear some memory: (these are just pointers anyway?)
  98. test_labels=0
  99. test_ptid=0
  100. test_imageID=0
  101. test_confid=0
  102. test_csf=0
  103. print('kill check models130')
  104. #create the model without the final softmax layer
  105. nosoftmax_model = innvestigate.utils.model_wo_softmax(self.model)
  106. print('kill check models135')
  107. #create the analyzer
  108. analyzer = innvestigate.create_analyzer("lrp.z",nosoftmax_model,disable_model_checks=True)
  109. print('kill check models138')
  110. ##analyzer = innvestigate.analyzer.LRPZ(nosoftmax_model,disable_model_checks=True)
  111. #analyze
  112. analysis = analyzer.analyze([[test_mri[img_number]],[test_jac[img_number]],[test_xls[img_number]]])
  113. print('shape of initial LRP heatmap: ', analysis.shape)
  114. analysis /= np.max(np.abs(analysis))
  115. analysis = np.squeeze(analysis,0)
  116. print('shape of squeezed LRP heatmap: ', analysis.shape)
  117. #analysis = np.moveaxis(analysis,0,3)
  118. #analysis = resize(analysis,(test_mri[img_number].shape)) #maybe this is actually turning the 1 into 91 instead of moving it to the end. maybe try a channels last adjustment or something??
  119. #analysis = analysis[:,:,:,0,:]
  120. print('shape of resized LRP heatmap: ', analysis.shape)
  121. return analysis
  122. def make_gradcam_heatmap2(self,img_data,img_number): #https://towardsdatascience.com/demystifying-convolutional-neural-networks-using-gradcam-554a85dd4e48 and https://keras.io/examples/vision/grad_cam/
  123. test_mri, test_jac, test_xls, test_labels, test_ptid, test_imageID, test_confid, test_csf = img_data
  124. last_conv = self.model.layers[-15] #.get_layer('name')
  125. grads = K.gradients(self.model.output[:,1],last_conv.output)[0] #[:,x] corresponds to the class I want I think? So AD = 0
  126. #print('grads: ', grads)
  127. print('shape of grads: ', grads.shape)
  128. pooled_grads = K.mean(grads,axis=(0,1,2,3))
  129. print('shape of pooled_grads: ', pooled_grads.shape)
  130. iterate = K.function([self.model.input[0],self.model.input[1],self.model.input[2]],[pooled_grads,last_conv.output[0]])
  131. print('shape of test_mri[j]: ', test_mri[img_number].shape)
  132. pooled_grads_value,conv_layer_output = iterate([[test_mri[img_number]],[test_jac[img_number]],[test_xls[img_number]]])
  133. for i in range(48): #range = size of conv layer units? aka number of filters/channels
  134. conv_layer_output[:,:,:,i] *= pooled_grads_value[i] #conv_layer_output[:,:,i] #multiplies feature maps with pooled grads
  135. heatmap = np.mean(conv_layer_output,axis=-1) #takes the mean over all the filters/channels to get just one map
  136. #for x in range(heatmap.shape[0]): #this chunk applies a relu to keep only the features that have a positive influence on the output map
  137. # for y in range(heatmap.shape[1]):
  138. # heatmap[x,y] = np.max(heatmap[x,y],0)
  139. print('shape of initial heatmap: ', heatmap.shape)
  140. heatmap = np.maximum(heatmap,0) #keeps only the positive values (only keep the features that have a positive influence on the output map)
  141. heatmap /= np.max(heatmap) #normalizes the heatmap to 0-1 #do I actually want to normalize the same way I normed my images? (x-mean)/std. or min-max?
  142. heatmap = resize(heatmap,(test_mri[img_number].shape))
  143. print('shape of resized heatmap: ', heatmap.shape)
  144. return heatmap
  145. def guided_backprop(self, img_data, img_number):
  146. """Guided Backpropagation method for visualizing input saliency."""
  147. #define new model which changes gradient fn for all relu activations acording to Guided Backpropagation
  148. if "GuidedBackProp" not in ops._gradient_registry._registry:
  149. @ops.RegisterGradient("GuidedBackProp")
  150. def _GuidedBackProp(op, grad):
  151. dtype = op.inputs[0].dtype
  152. return grad * tf.cast(grad > 0., dtype) * \
  153. tf.cast(op.inputs[0] > 0., dtype)
  154. g = tf.get_default_graph()
  155. with g.gradient_override_map({'Relu': 'GuidedBackProp'}):
  156. new_model = self.model
  157. test_mri, test_jac, test_xls, test_labels, test_ptid, test_imageID, test_confid, test_csf = img_data
  158. layer_output = new_model.layers[-15].output
  159. grads = K.gradients(layer_output, [new_model.input[0],new_model.input[1],new_model.input[2]])[0]
  160. backprop_fn = K.function([new_model.input[0],new_model.input[1],new_model.input[2], K.learning_phase()], [grads])
  161. grads_val = backprop_fn([[test_mri[img_number]],[test_jac[img_number]],[test_xls[img_number]], 0])[0]
  162. print('shape of initial gb: ', grads_val.shape)
  163. #grads_val = resize(grads_val,(test_mri[img_number].shape))
  164. grads_val = grads_val[0]
  165. print('shape of resized gb: ', grads_val.shape)
  166. return grads_val
  167. class RNN_Net ():
  168. def __init__ (self, params):
  169. self.params = params
  170. self.fc_CNNt1 = Input (shape = (self.params.final_layer_size,)) #Value corresponds to size of final layer in CNN
  171. self.fc_CNNt2 = Input (shape = (self.params.final_layer_size,))
  172. self.fc_CNNt3 = Input (shape = (self.params.final_layer_size,))
  173. #self.fc_CNN = Input (shape = (3,self.params.final_layer_size,)) #for rnn_bgrus_multiTP
  174. rnn = rnn_bgrus(drop_rate=self.params.RNN_drop_rate, final_layer_size = self.params.final_layer_size, kernel_regularizer=self.params.RNN_w_regularizer)
  175. with tf.device(self.params.gpu):
  176. self.fc_RNN = Lambda(rnn, name='rnn')([self.fc_CNNt1,self.fc_CNNt2,self.fc_CNNt3]) #original call without prior masking
  177. #self.fc_RNN = Lambda(rnn, name='rnn')(self.fc_CNN) #for multi-TP gru
  178. print('Shape of self.fc_RNN: ', self.fc_RNN.shape)
  179. self.RNNoutput_class= Dense(units = 2, activation = 'softmax', name = 'RNNclass_output') (self.fc_RNN) #switch to sigmoid from softmax? (for 2 class? for non-sparse categorical?) #back to softmax for multi-class
  180. def train (self, data):
  181. train_data, train_labels, val_data, val_labels = data
  182. #data is now loaded in keeping all scans from same patient aligned across timepoints.
  183. #That way train_labels can be just one array which applies to all timepoints (necessary because I only ask for 1 output when I define the model!)
  184. print('train data shape: ', train_data[0].shape)
  185. print('train labels shape: ', train_labels.shape)
  186. # self.fc_CNNt1,self.fc_CNNt2,self.fc_CNNt3, train_dataT1,train_dataT2,train_dataT3 = train_data
  187. train_samples = train_data[0].shape[0]
  188. # val_samples = len(val_data[0])
  189. self.model = Model(inputs = [self.fc_CNNt1,self.fc_CNNt2,self.fc_CNNt3], outputs = [self.RNNoutput_class])
  190. #self.model = Model(inputs = [self.fc_CNN], outputs = [self.RNNoutput_class]) #for multi-TP gru
  191. lrate = LearningRateScheduler(step_decay_RNN)
  192. callback = [lrate]
  193. #optimizer = Adam(lr=1e-5)
  194. self.optimizer = self.params.optimizer
  195. self.model.compile(optimizer = self.optimizer, loss = 'sparse_categorical_crossentropy', metrics =['acc'])
  196. self.model.summary()
  197. history = self.model.fit(x = train_data,
  198. y = train_labels,
  199. batch_size = self.params.RNN_batch_size, #can change this now ...previously seemed to have to be 1...? Otherwise got a mismatch in my last layer being [1,2] instead of [None,2]
  200. epochs = self.params.epochs,
  201. callbacks = callback,
  202. shuffle = True,
  203. verbose = 1,
  204. #steps_per_epoch = int(train_samples/self.params.batch_size))#,
  205. validation_data = (val_data,val_labels))
  206. #Save the model
  207. save_model(self.model,'SavedRNNModel')
  208. self.model.save_weights('SavedRNNWeights')
  209. return history.history
  210. def predict (self, data_test):
  211. test_data, test_labels = data_test
  212. # with open('//data/data_wnx3/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class/figchecks/test_data.txt', 'w') as testdata:
  213. # print(test_mri)
  214. print('shape of test_predsT1 inside predict: ' , test_data[0].shape)
  215. # print('test_predsT1 inside predict: ' , test_data[0])
  216. preds = self.model.predict (test_data, batch_size=self.params.RNN_batch_size)
  217. return preds
  218. def evaluate (self, data_test):
  219. # test_mri, test_jac, test_xls, test_labels = data_test
  220. test_data, test_labels = data_test
  221. metrics = self.model.evaluate (x = test_data, y = test_labels, batch_size = self.params.RNN_batch_size) #self.params.batch_size?
  222. return metrics
  223. def load_the_weights (self, SavedWeights):
  224. self.model = Model(inputs = [self.fc_CNNt1,self.fc_CNNt2,self.fc_CNNt3], outputs = [self.RNNoutput_class])
  225. self.model.compile(optimizer = self.params.optimizer, loss = 'sparse_categorical_crossentropy', metrics =['acc'])
  226. loaded = self.model.load_weights(SavedWeights)
  227. return loaded
  228. def XAlex3D(w_regularizer = None, drop_rate = 0., final_layer_size = 50) :
  229. #3D Multi-modal deep learning neural network (refer to fig. 4 for chain graph of architecture)
  230. ###Create the CNN architecture
  231. def f(mri_volume, mri_volume_jacobian, clinical_inputs):
  232. #First conv layers
  233. conv1_left = _conv_bn_relu_pool_drop(192, 11, 13, 11, strides = (4, 4, 4), w_regularizer = w_regularizer,drop_rate = drop_rate, pool=True) (mri_volume)
  234. #conv1_right = _conv_bn_relu_pool_drop(48, 15, 18, 15, strides = (4, 4, 4), w_regularizer = w_regularizer,drop_rate = drop_rate, pool=True) (mri_volume_jacobian)
  235. #Second layer
  236. conv2_left =_conv_bn_relu_pool_drop(384, 5, 6, 5, w_regularizer = w_regularizer, drop_rate = drop_rate, pool=True) (conv1_left)
  237. #conv2_right =_conv_bn_relu_pool_drop(96, 5, 6, 5, w_regularizer = w_regularizer, drop_rate = drop_rate, pool=True) (conv1_right)
  238. #conv2_concat = concatenate([conv2_left, conv2_right], axis = -1)
  239. #Third layer
  240. #conv3_left =_conv_bn_relu_pool_drop(96, 3, 4, 3, w_regularizer = w_regularizer, drop_rate = drop_rate, pool=True) (conv2_left)
  241. #conv3_right =_conv_bn_relu_pool_drop(96, 3, 4, 3, w_regularizer = w_regularizer, drop_rate = drop_rate, pool=True) (conv2_right)
  242. #conv3_concat = concatenate([conv2_left, conv2_right], axis = -1)
  243. #Introduce Middle Flow (separable convolutions with a residual connection)
  244. print('residual shape '+str(conv2_left.shape))
  245. conv_mid_1 = mid_flow (conv2_left, drop_rate, w_regularizer, filters = 384) #changed input to conv2_left from conv2_concat
  246. # conv_mid_2 = mid_flow (conv_mid_1, drop_rate, w_regularizer, filters = 192)
  247. #Split channels for grouped-style convolution
  248. conv_mid_1_1 = Lambda (lambda x:x[:,:,:,:,:192]) (conv_mid_1 )
  249. conv_mid_1_2 = Lambda (lambda x:x[:,:,:,:,192:]) (conv_mid_1 )
  250. conv5_left = _conv_bn_relu_pool_drop (96, 3, 4, 3, w_regularizer = w_regularizer, drop_rate = drop_rate, pool=True) (conv_mid_1_1)
  251. conv5_right = _conv_bn_relu_pool_drop (96, 3, 4, 3, w_regularizer = w_regularizer, drop_rate = drop_rate, pool=True) (conv_mid_1_2)
  252. conv6_left = _conv_bn_relu_pool_drop (48, 3, 4, 3, w_regularizer = w_regularizer,drop_rate = drop_rate, pool=True) (conv5_left)
  253. conv6_right = _conv_bn_relu_pool_drop (48, 3, 4, 3, w_regularizer = w_regularizer,drop_rate = drop_rate, pool=True) (conv5_right)
  254. #conv7_left = _conv_bn_relu_pool_drop (16, 3, 4, 3, w_regularizer = w_regularizer,drop_rate = drop_rate, pool=True) (conv6_left)
  255. #conv7_right = _conv_bn_relu_pool_drop (16, 3, 4, 3, w_regularizer = w_regularizer,drop_rate = drop_rate, pool=True) (conv6_right)
  256. conv6_concat = concatenate([conv6_left, conv6_right], axis = -1)
  257. #convExtra = Conv3D(48, (20,30,20),
  258. # strides = (1,1,1), kernel_initializer="he_normal",
  259. # padding="same", kernel_regularizer = w_regularizer)(conv6_concat)
  260. #Flatten 3D conv network representations
  261. flat_conv_6 = Reshape((np.prod(K.int_shape(conv6_concat)[1:]),))(conv6_concat)
  262. #2-layer Dense network for clinical features
  263. vol_fc1 = _fc_bn_relu_drop(64, w_regularizer = w_regularizer,
  264. drop_rate = drop_rate)(clinical_inputs)
  265. flat_volume = _fc_bn_relu_drop(20, w_regularizer = w_regularizer,
  266. drop_rate = drop_rate)(vol_fc1)
  267. #Combine image and clinical features embeddings
  268. fc1 = _fc_bn_relu_drop (20, w_regularizer, drop_rate = drop_rate, name='final_conv') (flat_conv_6)
  269. #fc2 = _fc_bn_relu_drop (40, w_regularizer, drop_rate = drop_rate) (fc1)
  270. flat = concatenate([fc1, flat_volume])
  271. #Final 4D embedding
  272. fc2 = Dense(units = final_layer_size, activation = 'linear', kernel_regularizer=w_regularizer, name='features') (flat) #was linear activation
  273. #fc2 = _fc_bn_relu_drop (final_layer_size, w_regularizer, drop_rate = drop_rate) (flat) #this was the orginal final layer
  274. return fc2
  275. return f
  276. ###Define pieces of CNN
  277. def _fc_bn_relu_drop (units, w_regularizer = None, drop_rate = 0., name = None):
  278. #Defines Fully connected block (see fig. 3 in paper)
  279. def f(input):
  280. fc = Dense(units = units, activation = 'linear', kernel_regularizer=w_regularizer, name = name) (input) #was linear activation
  281. fc = BatchNormalization()(fc)
  282. fc = ELU()(fc)
  283. fc = Dropout (drop_rate) (fc)
  284. return fc
  285. return f
  286. def _conv_bn_relu_pool_drop(filters, height, width, depth, strides=(1, 1, 1), padding = 'same', w_regularizer = None,
  287. drop_rate = None, name = None, pool = False):
  288. #Defines convolutional block (see fig. 3 in paper)
  289. def f(input):
  290. conv = Conv3D(filters, (height, width, depth),
  291. strides = strides, kernel_initializer="he_normal",
  292. padding=padding, kernel_regularizer = w_regularizer, name = name)(input)
  293. norm = BatchNormalization()(conv)
  294. elu = ELU()(norm)
  295. if pool == True:
  296. elu = MaxPooling3D(pool_size=3, strides=2, padding = 'same') (elu)
  297. return Dropout(drop_rate) (elu)
  298. return f
  299. def _sepconv_bn_relu_pool_drop (filters, height, width, depth, strides = (1, 1, 1), padding = 'same', depth_multiplier = 1, w_regularizer = None,
  300. drop_rate = None, name = None, pool = False):
  301. #Defines separable convolutional block (see fig. 3 in paper)
  302. def f (input):
  303. sep_conv = SeparableConv3D(filters, (height, width, depth),
  304. strides = strides, depth_multiplier = depth_multiplier,kernel_initializer="he_normal",
  305. padding=padding, kernel_regularizer = w_regularizer, name = name)(input)
  306. sep_conv = BatchNormalization()(sep_conv)
  307. elu = ELU()(sep_conv)
  308. if pool == True:
  309. elu = MaxPooling2D(pool_size=3, strides=2, padding = 'same') (elu)
  310. return Dropout(drop_rate) (elu)
  311. return f
  312. def mid_flow (x, drop_rate, w_regularizer, filters):
  313. #3 consecutive separable blocks with a residual connection (refer to fig. 4)
  314. residual = x
  315. x = _sepconv_bn_relu_pool_drop (filters, 3, 3, 3, padding='same', depth_multiplier = 1, drop_rate=drop_rate, w_regularizer = w_regularizer)(x)
  316. x = _sepconv_bn_relu_pool_drop (filters, 3, 3, 3, padding='same', depth_multiplier = 1, drop_rate=drop_rate, w_regularizer = w_regularizer)(x)
  317. x = _sepconv_bn_relu_pool_drop (filters, 3, 3, 3, padding='same', depth_multiplier = 1, drop_rate=drop_rate, w_regularizer = w_regularizer)(x)
  318. # print('x shape '+str(x.shape))
  319. x = add([x, residual])
  320. x = ELU()(x)
  321. return x
  322. def step_decay_CNN (epoch):
  323. #Decaying learning rate function
  324. initial_lrate = 4e-4
  325. drop = 0.3
  326. epochs_drop = 10.0
  327. lrate = initial_lrate * math.pow(drop,((1+epoch)/epochs_drop))
  328. return lrate
  329. def step_decay_RNN (epoch):
  330. #Decaying learning rate function
  331. initial_lrate = 2e-3
  332. drop = 0.3
  333. epochs_drop = 10.0
  334. lrate = initial_lrate * math.pow(drop,((1+epoch)/epochs_drop))
  335. return lrate
  336. ###Create the RNN
  337. def rnn_bgrus (drop_rate,final_layer_size, kernel_regularizer=None, mask=None):
  338. def f(inputs):
  339. fc2T1 = inputs[0]
  340. print ('fc2T1_ogShape: ', fc2T1.shape)
  341. print ('fc2T1_ogShape[0]: ', fc2T1.shape[0])
  342. fc2T2 = inputs[1]
  343. fc2T3 = inputs[2]
  344. batch = K.shape(fc2T1)[0] #just the number of samples in T1 (which is same as T2 and T3)
  345. unitsA = 100
  346. unitsB = 100
  347. unitsC = 100
  348. #reshape
  349. fc2T1 = tf.reshape(fc2T1,(batch,1,final_layer_size)) #GRU needs input shape: [batch(aka num_samples), timesteps, feature] should first 1 be params.batch_size?
  350. fc2T2 = tf.reshape(fc2T2,(batch,1,final_layer_size))
  351. fc2T3 = tf.reshape(fc2T3,(batch,1,final_layer_size))
  352. #Add masking layer to handle missing data
  353. fc2T1_mask = Masking(mask_value=-1, input_shape=(1,final_layer_size))(fc2T1) #needs input shape of (samples, timesteps, features)
  354. fc2T2_mask = Masking(mask_value=-1, input_shape=(1,final_layer_size))(fc2T2) #should give output shape of (samples,timesteps)
  355. fc2T3_mask = Masking(mask_value=-1, input_shape=(1,final_layer_size))(fc2T3)
  356. print('fc2T1_masked: ', fc2T1_mask)
  357. # first BGRU (a)
  358. a_forwardT1 = GRU(unitsA,activation='tanh',dropout=drop_rate,kernel_regularizer=kernel_regularizer)(fc2T1_mask) #output shape = (batch_size, timesteps, units)
  359. a_backwardT1 = GRU(unitsA, activation='tanh', go_backwards=True,dropout=drop_rate,kernel_regularizer=kernel_regularizer)(fc2T1_mask)
  360. a_forwardT2 = GRU(unitsA,activation='tanh',dropout=drop_rate,kernel_regularizer=kernel_regularizer)(fc2T2_mask)
  361. a_backwardT2 = GRU(unitsA, activation='tanh', go_backwards=True,dropout=drop_rate,kernel_regularizer=kernel_regularizer)(fc2T2_mask)
  362. a_forwardT3 = GRU(unitsA,activation='tanh',dropout=drop_rate,kernel_regularizer=kernel_regularizer)(fc2T3_mask)
  363. a_backwardT3 = GRU(unitsA, activation='tanh', go_backwards=True,dropout=drop_rate,kernel_regularizer=kernel_regularizer)(fc2T3_mask)
  364. a_gruT1 = concatenate([a_forwardT1, a_backwardT1], axis=-1)
  365. a_gruT2 = concatenate([a_forwardT2, a_backwardT2], axis=-1)
  366. a_gruT3 = concatenate([a_forwardT3, a_backwardT3], axis=-1)
  367. #reshape
  368. a_gruT1 = tf.reshape(a_gruT1,(batch,1,unitsA*2)) #had 1,200,1...why??
  369. a_gruT2 = tf.reshape(a_gruT2,(batch,1,unitsA*2))
  370. a_gruT3 = tf.reshape(a_gruT3,(batch,1,unitsA*2))
  371. # second BGRU (b)
  372. b_forwardT1 = GRU(unitsB,activation='tanh',dropout=drop_rate,kernel_regularizer=kernel_regularizer)(a_gruT1) #does this propagate the mask??? Does it need to anymore??
  373. b_backwardT1 = GRU(unitsB, activation='tanh', go_backwards=True,dropout=drop_rate,kernel_regularizer=kernel_regularizer)(a_gruT1)
  374. b_forwardT2 = GRU(unitsB,activation='tanh',dropout=drop_rate,kernel_regularizer=kernel_regularizer)(a_gruT2)
  375. b_backwardT2 = GRU(unitsB, activation='tanh', go_backwards=True,dropout=drop_rate,kernel_regularizer=kernel_regularizer)(a_gruT2)
  376. b_forwardT3 = GRU(unitsB,activation='tanh',dropout=drop_rate,kernel_regularizer=kernel_regularizer)(a_gruT3)
  377. b_backwardT3 = GRU(unitsB, activation='tanh', go_backwards=True,dropout=drop_rate,kernel_regularizer=kernel_regularizer)(a_gruT3)
  378. b_gruT1 = concatenate([b_forwardT1, b_backwardT1], axis=-1)
  379. b_gruT2 = concatenate([b_forwardT2, b_backwardT2], axis=-1)
  380. b_gruT3 = concatenate([b_forwardT3, b_backwardT3], axis=-1)
  381. #reshape
  382. b_gruT1 = tf.reshape(b_gruT1,(batch,1,unitsB*2))
  383. b_gruT2 = tf.reshape(b_gruT2,(batch,1,unitsB*2))
  384. b_gruT3 = tf.reshape(b_gruT3,(batch,1,unitsB*2))
  385. ##ADD a dropout layer or two; or add dropout to GRU (see documentation)
  386. # third BGRU (c)
  387. c_forwardT1 = GRU(unitsC,activation='tanh',dropout=drop_rate,kernel_regularizer=kernel_regularizer)(b_gruT1)
  388. c_backwardT1 = GRU(unitsC, activation='tanh', go_backwards=True,dropout=drop_rate,kernel_regularizer=kernel_regularizer)(b_gruT1)
  389. c_forwardT2 = GRU(unitsC,activation='tanh',dropout=drop_rate,kernel_regularizer=kernel_regularizer)(b_gruT2)
  390. c_backwardT2 = GRU(unitsC, activation='tanh', go_backwards=True,dropout=drop_rate,kernel_regularizer=kernel_regularizer)(b_gruT2)
  391. c_forwardT3 = GRU(unitsC,activation='tanh',dropout=drop_rate,kernel_regularizer=kernel_regularizer)(b_gruT3)
  392. c_backwardT3 = GRU(unitsC, activation='tanh', go_backwards=True,dropout=drop_rate,kernel_regularizer=kernel_regularizer)(b_gruT3)
  393. c_gruT1 = concatenate([c_forwardT1, c_backwardT1], axis=-1)
  394. c_gruT2 = concatenate([c_forwardT2, c_backwardT2], axis=-1)
  395. c_gruT3 = concatenate([c_forwardT3, c_backwardT3], axis=-1)
  396. #reshape
  397. #c_gruT1 = tf.reshape(c_gruT1,(batch,1,unitsC*2))
  398. #c_gruT2 = tf.reshape(c_gruT2,(batch,1,unitsC*2))
  399. #c_gruT3 = tf.reshape(c_gruT3,(batch,1,unitsC*2))
  400. # fourth BGRU (d)
  401. #d_forwardT1 = GRU(unitsC,activation='tanh')(c_gruT1)
  402. #d_backwardT1 = GRU(unitsC, activation='tanh', go_backwards=True)(c_gruT1)
  403. #d_forwardT2 = GRU(unitsC,activation='tanh')(c_gruT2)
  404. #d_backwardT2 = GRU(unitsC, activation='tanh', go_backwards=True)(c_gruT2)
  405. #d_forwardT3 = GRU(unitsC,activation='tanh')(c_gruT3)
  406. #d_backwardT3 = GRU(unitsC, activation='tanh', go_backwards=True)(c_gruT3)
  407. #d_gruT1 = concatenate([d_forwardT1, d_backwardT1], axis=-1)
  408. #d_gruT2 = concatenate([d_forwardT2, d_backwardT2], axis=-1)
  409. #d_gruT3 = concatenate([d_forwardT3, d_backwardT3], axis=-1)
  410. #concatenate final BGRU output
  411. bgru_total = concatenate([c_gruT1, c_gruT2, c_gruT3], axis=-1)
  412. #Fully connected layer
  413. rnn_fc1 = Dense(units = 20, activation = 'linear', name = 'RNNfcFinal') (bgru_total)
  414. rnn_fc1 = Dropout(drop_rate) (rnn_fc1)
  415. return rnn_fc1
  416. return f