run_train_model.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. # -*- coding:utf-8 -*-
  2. """
  3. Author: BigCat
  4. """
  5. import time
  6. import json
  7. import argparse
  8. import numpy as np
  9. import pandas as pd
  10. from config import *
  11. from modeling import LstmWithCRFModel, SignalLstmModel, tf
  12. from loguru import logger
  13. parser = argparse.ArgumentParser()
  14. parser.add_argument('--name', default="ssq", type=str, help="选择训练数据: 双色球/大乐透")
  15. args = parser.parse_args()
  16. pred_key = {}
  17. def create_train_data(name, windows):
  18. """ 创建训练数据
  19. :param name: 玩法,双色球/大乐透
  20. :param windows: 训练窗口
  21. :return:
  22. """
  23. data = pd.read_csv("{}{}".format(name_path[name]["path"], data_file_name))
  24. if not len(data):
  25. raise logger.error(" 请执行 get_data.py 进行数据下载!")
  26. else:
  27. # 创建模型文件夹
  28. if not os.path.exists(model_path):
  29. os.mkdir(model_path)
  30. logger.info("训练数据已加载! ")
  31. data = data.iloc[:, 2:].values
  32. logger.info("训练集数据维度: {}".format(data.shape))
  33. x_data, y_data = [], []
  34. for i in range(len(data) - windows - 1):
  35. sub_data = data[i:(i+windows+1), :]
  36. x_data.append(sub_data[1:])
  37. y_data.append(sub_data[0])
  38. cut_num = 6 if name == "ssq" else 5
  39. return {
  40. "red": {
  41. "x_data": np.array(x_data)[:, :, :cut_num], "y_data": np.array(y_data)[:, :cut_num]
  42. },
  43. "blue": {
  44. "x_data": np.array(x_data)[:, :, cut_num:], "y_data": np.array(y_data)[:, cut_num:]
  45. }
  46. }
  47. def train_red_ball_model(name, x_data, y_data):
  48. """ 红球模型训练
  49. :param name: 玩法
  50. :param x_data: 训练样本
  51. :param y_data: 训练标签
  52. :return:
  53. """
  54. m_args = model_args[name]
  55. x_data = x_data - 1
  56. y_data = y_data - 1
  57. data_len = x_data.shape[0]
  58. logger.info("特征数据维度: {}".format(x_data.shape))
  59. logger.info("标签数据维度: {}".format(y_data.shape))
  60. with tf.compat.v1.Session() as sess:
  61. red_ball_model = LstmWithCRFModel(
  62. batch_size=m_args["model_args"]["batch_size"],
  63. n_class=m_args["model_args"]["red_n_class"],
  64. ball_num=m_args["model_args"]["sequence_len"] if name == "ssq" else m_args["model_args"]["red_sequence_len"],
  65. w_size=m_args["model_args"]["windows_size"],
  66. embedding_size=m_args["model_args"]["red_embedding_size"],
  67. words_size=m_args["model_args"]["red_n_class"],
  68. hidden_size=m_args["model_args"]["red_hidden_size"],
  69. layer_size=m_args["model_args"]["red_layer_size"]
  70. )
  71. train_step = tf.compat.v1.train.AdamOptimizer(
  72. learning_rate=m_args["train_args"]["red_learning_rate"],
  73. beta1=m_args["train_args"]["red_beta1"],
  74. beta2=m_args["train_args"]["red_beta2"],
  75. epsilon=m_args["train_args"]["red_epsilon"],
  76. use_locking=False,
  77. name='Adam'
  78. ).minimize(red_ball_model.loss)
  79. sess.run(tf.compat.v1.global_variables_initializer())
  80. for epoch in range(m_args["model_args"]["red_epochs"]):
  81. for i in range(data_len):
  82. _, loss_, pred = sess.run([
  83. train_step, red_ball_model.loss, red_ball_model.pred_sequence
  84. ], feed_dict={
  85. "inputs:0": x_data[i:(i+1), :, :],
  86. "tag_indices:0": y_data[i:(i+1), :],
  87. "sequence_length:0": np.array([m_args["model_args"]["sequence_len"]]*1) \
  88. if name == "ssq" else np.array([m_args["model_args"]["red_sequence_len"]]*1)
  89. })
  90. if i % 100 == 0:
  91. logger.info("epoch: {}, loss: {}, tag: {}, pred: {}".format(
  92. epoch, loss_, y_data[i:(i+1), :][0] + 1, pred[0] + 1)
  93. )
  94. pred_key[ball_name[0][0]] = red_ball_model.pred_sequence.name
  95. if not os.path.exists(m_args["path"]["red"]):
  96. os.makedirs(m_args["path"]["red"])
  97. saver = tf.compat.v1.train.Saver()
  98. saver.save(sess, "{}{}.{}".format(m_args["path"]["red"], red_ball_model_name, extension))
  99. def train_blue_ball_model(name, x_data, y_data):
  100. """ 蓝球模型训练
  101. :param name: 玩法
  102. :param x_data: 训练样本
  103. :param y_data: 训练标签
  104. :return:
  105. """
  106. m_args = model_args[name]
  107. x_data = x_data - 1
  108. data_len = x_data.shape[0]
  109. if name == "ssq":
  110. x_data = x_data.reshape(len(x_data), m_args["model_args"]["windows_size"])
  111. y_data = tf.keras.utils.to_categorical(y_data - 1, num_classes=m_args["model_args"]["blue_n_class"])
  112. logger.info("特征数据维度: {}".format(x_data.shape))
  113. logger.info("标签数据维度: {}".format(y_data.shape))
  114. with tf.compat.v1.Session() as sess:
  115. if name == "ssq":
  116. blue_ball_model = SignalLstmModel(
  117. batch_size=m_args["model_args"]["batch_size"],
  118. n_class=m_args["model_args"]["blue_n_class"],
  119. w_size=m_args["model_args"]["windows_size"],
  120. embedding_size=m_args["model_args"]["blue_embedding_size"],
  121. hidden_size=m_args["model_args"]["blue_hidden_size"],
  122. outputs_size=m_args["model_args"]["blue_n_class"],
  123. layer_size=m_args["model_args"]["blue_layer_size"]
  124. )
  125. else:
  126. blue_ball_model = LstmWithCRFModel(
  127. batch_size=m_args["model_args"]["batch_size"],
  128. n_class=m_args["model_args"]["blue_n_class"],
  129. ball_num=m_args["model_args"]["blue_sequence_len"],
  130. w_size=m_args["model_args"]["windows_size"],
  131. embedding_size=m_args["model_args"]["blue_embedding_size"],
  132. words_size=m_args["model_args"]["blue_n_class"],
  133. hidden_size=m_args["model_args"]["blue_hidden_size"],
  134. layer_size=m_args["model_args"]["blue_layer_size"]
  135. )
  136. train_step = tf.compat.v1.train.AdamOptimizer(
  137. learning_rate=m_args["train_args"]["blue_learning_rate"],
  138. beta1=m_args["train_args"]["blue_beta1"],
  139. beta2=m_args["train_args"]["blue_beta2"],
  140. epsilon=m_args["train_args"]["blue_epsilon"],
  141. use_locking=False,
  142. name='Adam'
  143. ).minimize(blue_ball_model.loss)
  144. sess.run(tf.compat.v1.global_variables_initializer())
  145. for epoch in range(m_args["model_args"]["blue_epochs"]):
  146. for i in range(data_len):
  147. if name == "ssq":
  148. _, loss_, pred = sess.run([
  149. train_step, blue_ball_model.loss, blue_ball_model.pred_label
  150. ], feed_dict={
  151. "inputs:0": x_data[i:(i+1), :],
  152. "tag_indices:0": y_data[i:(i+1), :],
  153. })
  154. if i % 100 == 0:
  155. logger.info("epoch: {}, loss: {}, tag: {}, pred: {}".format(
  156. epoch, loss_, np.argmax(y_data[i:(i+1), :][0]) + 1, pred[0] + 1)
  157. )
  158. else:
  159. _, loss_, pred = sess.run([
  160. train_step, blue_ball_model.loss, blue_ball_model.pred_sequence
  161. ], feed_dict={
  162. "inputs:0": x_data[i:(i + 1), :, :],
  163. "tag_indices:0": y_data[i:(i + 1), :],
  164. "sequence_length:0": np.array([m_args["model_args"]["blue_sequence_len"]] * 1)
  165. })
  166. if i % 100 == 0:
  167. logger.info("epoch: {}, loss: {}, tag: {}, pred: {}".format(
  168. epoch, loss_, y_data[i:(i + 1), :][0] + 1, pred[0] + 1)
  169. )
  170. pred_key[ball_name[1][0]] = blue_ball_model.pred_label.name if name == "ssq" else blue_ball_model.pred_sequence.name
  171. if not os.path.exists(m_args["path"]["blue"]):
  172. os.mkdir(m_args["path"]["blue"])
  173. saver = tf.compat.v1.train.Saver()
  174. saver.save(sess, "{}{}.{}".format(m_args["path"]["blue"], blue_ball_model_name, extension))
  175. def run(name):
  176. """ 执行训练
  177. :param name: 玩法
  178. :return:
  179. """
  180. logger.info("正在创建【{}】数据集...".format(name_path[name]["name"]))
  181. train_data = create_train_data(args.name, model_args[name]["model_args"]["windows_size"])
  182. logger.info("开始训练【{}】红球模型...".format(name_path[name]["name"]))
  183. start_time = time.time()
  184. train_red_ball_model(name, x_data=train_data["red"]["x_data"], y_data=train_data["red"]["y_data"])
  185. logger.info("训练耗时: {}".format(time.time() - start_time))
  186. tf.compat.v1.reset_default_graph() # 重置网络图
  187. logger.info("开始训练【{}】蓝球模型...".format(name_path[name]["name"]))
  188. start_time = time.time()
  189. train_blue_ball_model(name, x_data=train_data["blue"]["x_data"], y_data=train_data["blue"]["y_data"])
  190. logger.info("训练耗时: {}".format(time.time() - start_time))
  191. # 保存预测关键结点名
  192. with open("{}/{}/{}".format(model_path, name, pred_key_name), "w") as f:
  193. json.dump(pred_key, f)
  194. if __name__ == '__main__':
  195. if not args.name:
  196. raise Exception("玩法名称不能为空!")
  197. else:
  198. run(args.name)