123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217 |
- # -*- coding:utf-8 -*-
- """
- Author: BigCat
- """
- import time
- import json
- import argparse
- import numpy as np
- import pandas as pd
- from config import *
- from modeling import LstmWithCRFModel, SignalLstmModel, tf
- from loguru import logger
- parser = argparse.ArgumentParser()
- parser.add_argument('--name', default="ssq", type=str, help="选择训练数据: 双色球/大乐透")
- args = parser.parse_args()
- pred_key = {}
- def create_train_data(name, windows):
- """ 创建训练数据
- :param name: 玩法,双色球/大乐透
- :param windows: 训练窗口
- :return:
- """
- data = pd.read_csv("{}{}".format(name_path[name]["path"], data_file_name))
- if not len(data):
- raise logger.error(" 请执行 get_data.py 进行数据下载!")
- else:
- # 创建模型文件夹
- if not os.path.exists(model_path):
- os.mkdir(model_path)
- logger.info("训练数据已加载! ")
- data = data.iloc[:, 2:].values
- logger.info("训练集数据维度: {}".format(data.shape))
- x_data, y_data = [], []
- for i in range(len(data) - windows - 1):
- sub_data = data[i:(i+windows+1), :]
- x_data.append(sub_data[1:])
- y_data.append(sub_data[0])
- cut_num = 6 if name == "ssq" else 5
- return {
- "red": {
- "x_data": np.array(x_data)[:, :, :cut_num], "y_data": np.array(y_data)[:, :cut_num]
- },
- "blue": {
- "x_data": np.array(x_data)[:, :, cut_num:], "y_data": np.array(y_data)[:, cut_num:]
- }
- }
- def train_red_ball_model(name, x_data, y_data):
- """ 红球模型训练
- :param name: 玩法
- :param x_data: 训练样本
- :param y_data: 训练标签
- :return:
- """
- m_args = model_args[name]
- x_data = x_data - 1
- y_data = y_data - 1
- data_len = x_data.shape[0]
- logger.info("特征数据维度: {}".format(x_data.shape))
- logger.info("标签数据维度: {}".format(y_data.shape))
- with tf.compat.v1.Session() as sess:
- red_ball_model = LstmWithCRFModel(
- batch_size=m_args["model_args"]["batch_size"],
- n_class=m_args["model_args"]["red_n_class"],
- ball_num=m_args["model_args"]["sequence_len"] if name == "ssq" else m_args["model_args"]["red_sequence_len"],
- w_size=m_args["model_args"]["windows_size"],
- embedding_size=m_args["model_args"]["red_embedding_size"],
- words_size=m_args["model_args"]["red_n_class"],
- hidden_size=m_args["model_args"]["red_hidden_size"],
- layer_size=m_args["model_args"]["red_layer_size"]
- )
- train_step = tf.compat.v1.train.AdamOptimizer(
- learning_rate=m_args["train_args"]["red_learning_rate"],
- beta1=m_args["train_args"]["red_beta1"],
- beta2=m_args["train_args"]["red_beta2"],
- epsilon=m_args["train_args"]["red_epsilon"],
- use_locking=False,
- name='Adam'
- ).minimize(red_ball_model.loss)
- sess.run(tf.compat.v1.global_variables_initializer())
- for epoch in range(m_args["model_args"]["red_epochs"]):
- for i in range(data_len):
- _, loss_, pred = sess.run([
- train_step, red_ball_model.loss, red_ball_model.pred_sequence
- ], feed_dict={
- "inputs:0": x_data[i:(i+1), :, :],
- "tag_indices:0": y_data[i:(i+1), :],
- "sequence_length:0": np.array([m_args["model_args"]["sequence_len"]]*1) \
- if name == "ssq" else np.array([m_args["model_args"]["red_sequence_len"]]*1)
- })
- if i % 100 == 0:
- logger.info("epoch: {}, loss: {}, tag: {}, pred: {}".format(
- epoch, loss_, y_data[i:(i+1), :][0] + 1, pred[0] + 1)
- )
- pred_key[ball_name[0][0]] = red_ball_model.pred_sequence.name
- if not os.path.exists(m_args["path"]["red"]):
- os.makedirs(m_args["path"]["red"])
- saver = tf.compat.v1.train.Saver()
- saver.save(sess, "{}{}.{}".format(m_args["path"]["red"], red_ball_model_name, extension))
- def train_blue_ball_model(name, x_data, y_data):
- """ 蓝球模型训练
- :param name: 玩法
- :param x_data: 训练样本
- :param y_data: 训练标签
- :return:
- """
- m_args = model_args[name]
- x_data = x_data - 1
- data_len = x_data.shape[0]
- if name == "ssq":
- x_data = x_data.reshape(len(x_data), m_args["model_args"]["windows_size"])
- y_data = tf.keras.utils.to_categorical(y_data - 1, num_classes=m_args["model_args"]["blue_n_class"])
- logger.info("特征数据维度: {}".format(x_data.shape))
- logger.info("标签数据维度: {}".format(y_data.shape))
- with tf.compat.v1.Session() as sess:
- if name == "ssq":
- blue_ball_model = SignalLstmModel(
- batch_size=m_args["model_args"]["batch_size"],
- n_class=m_args["model_args"]["blue_n_class"],
- w_size=m_args["model_args"]["windows_size"],
- embedding_size=m_args["model_args"]["blue_embedding_size"],
- hidden_size=m_args["model_args"]["blue_hidden_size"],
- outputs_size=m_args["model_args"]["blue_n_class"],
- layer_size=m_args["model_args"]["blue_layer_size"]
- )
- else:
- blue_ball_model = LstmWithCRFModel(
- batch_size=m_args["model_args"]["batch_size"],
- n_class=m_args["model_args"]["blue_n_class"],
- ball_num=m_args["model_args"]["blue_sequence_len"],
- w_size=m_args["model_args"]["windows_size"],
- embedding_size=m_args["model_args"]["blue_embedding_size"],
- words_size=m_args["model_args"]["blue_n_class"],
- hidden_size=m_args["model_args"]["blue_hidden_size"],
- layer_size=m_args["model_args"]["blue_layer_size"]
- )
- train_step = tf.compat.v1.train.AdamOptimizer(
- learning_rate=m_args["train_args"]["blue_learning_rate"],
- beta1=m_args["train_args"]["blue_beta1"],
- beta2=m_args["train_args"]["blue_beta2"],
- epsilon=m_args["train_args"]["blue_epsilon"],
- use_locking=False,
- name='Adam'
- ).minimize(blue_ball_model.loss)
- sess.run(tf.compat.v1.global_variables_initializer())
- for epoch in range(m_args["model_args"]["blue_epochs"]):
- for i in range(data_len):
- if name == "ssq":
- _, loss_, pred = sess.run([
- train_step, blue_ball_model.loss, blue_ball_model.pred_label
- ], feed_dict={
- "inputs:0": x_data[i:(i+1), :],
- "tag_indices:0": y_data[i:(i+1), :],
- })
- if i % 100 == 0:
- logger.info("epoch: {}, loss: {}, tag: {}, pred: {}".format(
- epoch, loss_, np.argmax(y_data[i:(i+1), :][0]) + 1, pred[0] + 1)
- )
- else:
- _, loss_, pred = sess.run([
- train_step, blue_ball_model.loss, blue_ball_model.pred_sequence
- ], feed_dict={
- "inputs:0": x_data[i:(i + 1), :, :],
- "tag_indices:0": y_data[i:(i + 1), :],
- "sequence_length:0": np.array([m_args["model_args"]["blue_sequence_len"]] * 1)
- })
- if i % 100 == 0:
- logger.info("epoch: {}, loss: {}, tag: {}, pred: {}".format(
- epoch, loss_, y_data[i:(i + 1), :][0] + 1, pred[0] + 1)
- )
- pred_key[ball_name[1][0]] = blue_ball_model.pred_label.name if name == "ssq" else blue_ball_model.pred_sequence.name
- if not os.path.exists(m_args["path"]["blue"]):
- os.mkdir(m_args["path"]["blue"])
- saver = tf.compat.v1.train.Saver()
- saver.save(sess, "{}{}.{}".format(m_args["path"]["blue"], blue_ball_model_name, extension))
- def run(name):
- """ 执行训练
- :param name: 玩法
- :return:
- """
- logger.info("正在创建【{}】数据集...".format(name_path[name]["name"]))
- train_data = create_train_data(args.name, model_args[name]["model_args"]["windows_size"])
- logger.info("开始训练【{}】红球模型...".format(name_path[name]["name"]))
- start_time = time.time()
- train_red_ball_model(name, x_data=train_data["red"]["x_data"], y_data=train_data["red"]["y_data"])
- logger.info("训练耗时: {}".format(time.time() - start_time))
- tf.compat.v1.reset_default_graph() # 重置网络图
- logger.info("开始训练【{}】蓝球模型...".format(name_path[name]["name"]))
- start_time = time.time()
- train_blue_ball_model(name, x_data=train_data["blue"]["x_data"], y_data=train_data["blue"]["y_data"])
- logger.info("训练耗时: {}".format(time.time() - start_time))
- # 保存预测关键结点名
- with open("{}/{}/{}".format(model_path, name, pred_key_name), "w") as f:
- json.dump(pred_key, f)
- if __name__ == '__main__':
- if not args.name:
- raise Exception("玩法名称不能为空!")
- else:
- run(args.name)
|