ozan_rep_fun.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. import math
  2. import statistics
  3. import torch.autograd
  4. from .ozan_min_norm_solvers import MinNormSolver
  5. class OzanRepFunction(torch.autograd.Function):
  6. # def __init__(self,copies,noop=False):
  7. # super(OzanRepFunction,self).__init__()
  8. # self.copies=copies
  9. # self.noop=noop
  10. n = 5
  11. def __init__(self):
  12. super(OzanRepFunction, self).__init__()
  13. @staticmethod
  14. def forward(ctx, input):
  15. shape = input.shape
  16. ret = input.expand(OzanRepFunction.n, *shape)
  17. return ret.clone() # REASON FOR ERROR: forgot to .clone() here
  18. # @staticmethod
  19. # def backward(ctx, grad_output):
  20. # # print("backward",grad_output.shape)
  21. # # print()
  22. # # print()
  23. # if grad_output.shape[0]==2:
  24. # theta0,theta1=grad_output[0].view(-1).float(), grad_output[1].view(-1).float()
  25. # diff = theta0-theta1
  26. # num = diff.dot(theta0)
  27. # denom = (diff.dot(diff)+.00000001)
  28. # a = num/denom
  29. # a1=float(a)
  30. # a = a.clamp(0,1)
  31. # a = float(a)
  32. # # print(float(a),a1,float(num),float(denom))
  33. # # print()
  34. # # print()
  35. # def get_out_for_a(a):
  36. # return grad_output[0]*(1-a)+grad_output[1]*a
  37. # def get_score_for_a(a):
  38. # out = get_out_for_a(a)
  39. # vec = out.view(-1)
  40. # score = vec.dot(vec)
  41. # return float(score)
  42. # # print(0,get_score_for_a(0),
  43. # # .1,get_score_for_a(0.1),
  44. # # .2,get_score_for_a(0.2),
  45. # # .3,get_score_for_a(0.3),
  46. # # .4,get_score_for_a(0.4),
  47. # # .5,get_score_for_a(0.5),
  48. # # .6,get_score_for_a(0.6),
  49. # # .7,get_score_for_a(0.7),
  50. # # .8,get_score_for_a(0.8),
  51. # # .9,get_score_for_a(0.9),
  52. # # 1,get_score_for_a(1))
  53. # # print(a,get_score_for_a(a))
  54. # # print()
  55. # # print()
  56. # out = get_out_for_a(a)
  57. # #out=out*2
  58. # elif grad_output.shape[0]==1:
  59. # grad_input=grad_output.clone()
  60. # out = grad_input.sum(dim=0)
  61. # else:
  62. # pass
  63. # return out
  64. @staticmethod
  65. def backward(ctx, grad_output):
  66. num_grads = grad_output.shape[0]
  67. batch_size = grad_output.shape[1]
  68. # print(num_grads)
  69. # print(num_grads)
  70. # print(num_grads)
  71. # print(grad_output.shape)
  72. # print(grad_output.shape)
  73. # print(grad_output.shape)
  74. # print(num_grads)
  75. # print(num_grads)
  76. if num_grads >= 2:
  77. # print ('shape in = ',grad_output[0].view(batch_size,-1).float().shape)
  78. try:
  79. alphas, score = MinNormSolver.find_min_norm_element(
  80. [grad_output[i].view(batch_size, -1).float() for i in range(num_grads)])
  81. # print(alphas)
  82. except ValueError as error:
  83. alphas = [1 / num_grads for i in range(num_grads)]
  84. # print('outs shape',out.shape)
  85. # print('alphas shape',alphas.shape)
  86. # out = out.view()
  87. # out = torch.zeros_like(grad_output[0])
  88. # print(alphas)
  89. # print()
  90. # print()
  91. grad_outputs = [grad_output[i] * alphas[i] * math.sqrt(num_grads) for i in range(num_grads)]
  92. output = grad_outputs[0]
  93. for i in range(1, num_grads):
  94. output += grad_outputs[i]
  95. return output
  96. elif num_grads == 1:
  97. grad_input = grad_output.clone()
  98. out = grad_input.sum(dim=0)
  99. else:
  100. pass
  101. return out
  102. ozan_rep_function = OzanRepFunction.apply
  103. class TrevorRepFunction(torch.autograd.Function):
  104. n = 5
  105. def __init__(self):
  106. super(TrevorRepFunction, self).__init__()
  107. @staticmethod
  108. def forward(ctx, input):
  109. return input.clone()
  110. @staticmethod
  111. def backward(ctx, grad_output):
  112. # num_grads = grad_output.shape[0]
  113. # print(num_grads)
  114. grad_input = grad_output.clone()
  115. mul = 1.0 / math.sqrt(TrevorRepFunction.n)
  116. out = grad_input * mul
  117. return out
  118. trevor_rep_function = TrevorRepFunction.apply
  119. count = 0
  120. class GradNormRepFunction(torch.autograd.Function):
  121. n = 5
  122. inital_task_losses = None
  123. current_task_losses = None
  124. current_weights = None
  125. def __init__(self):
  126. super(GradNormRepFunction, self).__init__()
  127. @staticmethod
  128. def forward(ctx, input):
  129. shape = input.shape
  130. ret = input.expand(GradNormRepFunction.n, *shape)
  131. return ret.clone()
  132. @staticmethod
  133. def backward(ctx, grad_output):
  134. global count
  135. num_grads = grad_output.shape[0]
  136. batch_size = grad_output.shape[1]
  137. grad_output = grad_output.float()
  138. if num_grads >= 2:
  139. GiW = [torch.sqrt(grad_output[i].reshape(-1).dot(grad_output[i].reshape(-1))) *
  140. GradNormRepFunction.current_weights[i] for i in range(num_grads)]
  141. GW_bar = torch.mean(torch.stack(GiW))
  142. try:
  143. Li_ratio = [c / max(i, .0000001) for c, i in
  144. zip(GradNormRepFunction.current_task_losses, GradNormRepFunction.inital_task_losses)]
  145. mean_ratio = statistics.mean(Li_ratio)
  146. ri = [lir / max(mean_ratio, .00000001) for lir in Li_ratio]
  147. target_grad = [float(GW_bar * (max(r_i, .00000001) ** 1.5)) for r_i in ri]
  148. target_weight = [float(target_grad[i] / float(GiW[i])) for i in range(num_grads)]
  149. total_weight = sum(target_weight)
  150. total_weight = max(.0000001, total_weight)
  151. target_weight = [i * num_grads / total_weight for i in target_weight]
  152. for i in range(len(GradNormRepFunction.current_weights)):
  153. wi = GradNormRepFunction.current_weights[i]
  154. GradNormRepFunction.current_weights[i] += (.0001 * wi if (wi < target_weight[i]) else -.0001 * wi)
  155. # print('Li_ratio',Li_ratio)
  156. # print('mean_ratio',mean_ratio)
  157. # print('ri',ri)
  158. # print('target_weight',target_weight)
  159. # print('current_weights',GradNormRepFunction.current_weights)
  160. # print()
  161. # print()
  162. count += 1
  163. if count % 80 == 0:
  164. with open("gradnorm_weights.txt", "a") as myfile:
  165. myfile.write('target: ' + str(target_weight) + '\n')
  166. total_weight = sum(GradNormRepFunction.current_weights)
  167. total_weight = max(.0000001, total_weight)
  168. GradNormRepFunction.current_weights = [i * num_grads / total_weight for i in
  169. GradNormRepFunction.current_weights]
  170. except:
  171. pass
  172. grad_outputs = [grad_output[i] * GradNormRepFunction.current_weights[i] * (1 / math.sqrt(num_grads)) for i
  173. in range(num_grads)]
  174. output = grad_outputs[0]
  175. for i in range(1, num_grads):
  176. output += grad_outputs[i]
  177. return output.half()
  178. elif num_grads == 1:
  179. grad_input = grad_output.clone()
  180. out = grad_input.sum(dim=0)
  181. else:
  182. pass
  183. return out
  184. gradnorm_rep_function = GradNormRepFunction.apply