AUC.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. import numpy as np
  2. import random
  3. import time
  4. def Calculation_AUC(MatrixAdjacency_Train, MatrixAdjacency_Test, Matrix_similarity, MaxNodeNum):
  5. AUC_TimeStart = time.clock()
  6. print
  7. ' Calculation AUC......'
  8. AUCnum = 672400
  9. Matrix_similarity = np.triu(Matrix_similarity - Matrix_similarity * MatrixAdjacency_Train)
  10. Matrix_NoExist = np.ones(MaxNodeNum) - MatrixAdjacency_Train - MatrixAdjacency_Test - np.eye(MaxNodeNum)
  11. Test = np.triu(MatrixAdjacency_Test)
  12. NoExist = np.triu(Matrix_NoExist)
  13. # Test_num =len(np.argwhere(Test == 1))
  14. # NoExist_num = len(np.argwhere(NoExist == 1))
  15. # # # Test_num = np.nonzero(Test)[0].shape[0]
  16. # # # NoExist_num = np.nonzero(NoExist)[0].shape[0]
  17. Test_num = len(np.argwhere(Test == 1))
  18. NoExist_num = len(np.argwhere(NoExist == 1))
  19. # print ' Test_num:%d'%Test_num
  20. # print ' NoExist_num:%d'%NoExist_num
  21. Test_rd = [int(x) for index, x in enumerate((Test_num * np.random.rand(1, AUCnum))[0])]
  22. NoExist_rd = [int(x) for index, x in enumerate((NoExist_num * np.random.rand(1, AUCnum))[0])]
  23. # print ' Test_rd:'+str(Test_rd)
  24. # print ' Test_rd长度:'+str(len(Test_rd))
  25. # print ' Test_rd最大值:'+str(max(Test_rd))
  26. # print ' NoExist_rd:'+str(NoExist_rd)
  27. # print ' NoExist_rd长度:'+str(len(NoExist_rd))
  28. TestPre = Matrix_similarity * Test
  29. NoExistPre = Matrix_similarity * NoExist
  30. TestIndex = np.argwhere(Test == 1)
  31. Test_Data = np.array([TestPre[x[0], x[1]] for index, x in enumerate(TestIndex)]).T
  32. NoExistIndex = np.argwhere(NoExist == 1)
  33. NoExist_Data = np.array([NoExistPre[x[0], x[1]] for index, x in enumerate(NoExistIndex)]).T
  34. # print Test_Data
  35. # print Test_Data.shape
  36. # print NoExist_Data
  37. # print NoExist_Data.shape
  38. Test_rd = np.array([Test_Data[x] for index, x in enumerate(Test_rd)])
  39. NoExist_rd = np.array([NoExist_Data[x] for index, x in enumerate(NoExist_rd)])
  40. # print Test_rd
  41. # print Test_rd.shape
  42. # print NoExist_rd
  43. # print NoExist_rd.shape
  44. # aucArray = Test_rd - NoExist_rd
  45. # n1 = len(np.argwhere(aucArray > 0))
  46. # n2 = len(np.argwhere(aucArray == 0))
  47. n1, n2 = 0, 0
  48. for num in range(AUCnum):
  49. if Test_rd[num] > NoExist_rd[num]:
  50. n1 += 1
  51. elif Test_rd[num] == NoExist_rd[num]:
  52. n2 += 0.5
  53. else:
  54. n1 += 0
  55. auc = float(n1 + n2) / AUCnum
  56. print(' AUC指标为:%f' % auc)
  57. AUC_TimeEnd = time.clock()
  58. print(' AUCTime:%f s' % (AUC_TimeEnd - AUC_TimeStart))
  59. return auc
  60. # 随机选择元素进行比较,并计算得分
  61. def calculate_score(N, train, text, score, n):
  62. total_score = 0.0
  63. for i in range(n):
  64. while True:
  65. random_row = random.randint(0, N-1)
  66. random_col = random.randint(0, N-1)
  67. if train[random_row][random_col] == 0:
  68. rand_index_train = [random_row, random_col]
  69. break
  70. while True:
  71. random_row = random.randint(0, N-1)
  72. random_col = random.randint(0, N-1)
  73. if text[random_row][random_col] == 1:
  74. rand_index_text = [random_row, random_col]
  75. break
  76. # 计算得分
  77. ##print("text_score:", score[rand_index_text[0]][rand_index_text[1]])
  78. ##print("train_score:", score[rand_index_train[0]][rand_index_train[1]])
  79. if score[rand_index_text[0]][rand_index_text[1]] > score[rand_index_train[0]][rand_index_train[1]]:
  80. total_score += 1.0
  81. elif score[rand_index_text[0]][rand_index_text[1]] == score[rand_index_train[0]][rand_index_train[1]]:
  82. total_score += 0.5
  83. return total_score