{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "10291b0e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import sklearn\n",
    "import pandas as pd\n",
    "import os\n",
    "import sys\n",
    "import time\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "from tensorflow.keras.models import  Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "2e43f504",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.10.0\n",
      "sys.version_info(major=3, minor=9, micro=15, releaselevel='final', serial=0)\n",
      "matplotlib 3.6.2\n",
      "numpy 1.23.4\n",
      "pandas 1.5.1\n",
      "sklearn 1.1.3\n",
      "tensorflow 2.10.0\n",
      "keras.api._v2.keras 2.10.0\n"
     ]
    }
   ],
   "source": [
    "# 查看库信息\n",
    "print(tf.__version__)\n",
    "print(sys.version_info)\n",
    "for module in mpl, np, pd, sklearn, tf, keras:\n",
    "    print(module.__name__, module.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8a0e2b33",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 一,处理数据\n",
    "# 1,加载训练数据\n",
    "# https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt\n",
    "input_filepath = \"./shakespeare.txt\"\n",
    "text = open(input_filepath, 'r').read()\n",
    "# 简单输出查看数据\n",
    "# print(len(text))\n",
    "# print(text[0:100])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "bdff13a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2,生成词库\n",
    "vocab = sorted(set(text))\n",
    "# 查看生成的词库\n",
    "# print(len(vocab))\n",
    "# print(vocab)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "17aee416",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3,生成由 字符->数字 的字典\n",
    "char2idx = {char:idx for idx, char in enumerate(vocab)}\n",
    "# 查看字典\n",
    "# print(char2idx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "ffdf1013",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4,将词库转换为numpy中的array\n",
    "idx2char = np.array(vocab)\n",
    "# print(idx2char)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "6ceb6262",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5,将文本转化为数字\n",
    "text_as_int = np.array([char2idx[c] for c in text])\n",
    "# print(text_as_int[0:10])\n",
    "# print(text[0:10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "8d8dbedd",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# 6,将数据加载到dataset中,并处理数据\n",
    "\n",
    "# 将array转换为tensor格式的数据\n",
    "char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)\n",
    "\n",
    "# 使用tensor的方法将数据集每个100个字符进行batch分序列\n",
    "seq_length = 100\n",
    "seq_dataset = char_dataset.batch(seq_length + 1, drop_remainder = True)\n",
    "# 选择数据查看\n",
    "# for ch_id in char_dataset.take(2):\n",
    "#     print(ch_id, idx2char[ch_id.numpy()])\n",
    "\n",
    "# for seq_id in seq_dataset.take(2):\n",
    "#     print(seq_id)\n",
    "#     print(repr(''.join(idx2char[seq_id.numpy()])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "2c02cfa1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 7,将数据分割成输入和输出两个部分\n",
    "def split_input_target(id_text):\n",
    "    # 将序列作如下变化 abcde -> abcd, bcde\n",
    "    return id_text[0:-1], id_text[1:]\n",
    "seq_dataset = seq_dataset.map(split_input_target)\n",
    "# 简单查看结果\n",
    "# for item_input, item_output in seq_dataset.take(2):\n",
    "#     print(item_input.numpy())\n",
    "#     print(item_output.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ebb8f89c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 8,打乱数据,batch分组,batch_size=64\n",
    "batch_size = 64\n",
    "buffer_size = 10000\n",
    "\n",
    "seq_dataset = seq_dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "5b91648e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 二,构建模型\n",
    "# 1,定义模型常量\n",
    "vocab_size = len(vocab)\n",
    "embedding_dim = 256\n",
    "rnn_units = 1024"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "af8c56b7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(64, None)\n",
      "(64, None, 256)\n",
      "(64, None, 1024)\n",
      "(64, None, 65)\n",
      "Model: \"model\"\n",
      "_________________________________________________________________\n",
      " Layer (type)                Output Shape              Param #   \n",
      "=================================================================\n",
      " input_1 (InputLayer)        [(64, None)]              0         \n",
      "                                                                 \n",
      " embedding (Embedding)       (64, None, 256)           16640     \n",
      "                                                                 \n",
      " lstm (LSTM)                 (64, None, 1024)          5246976   \n",
      "                                                                 \n",
      " dense (Dense)               (64, None, 65)            66625     \n",
      "                                                                 \n",
      "=================================================================\n",
      "Total params: 5,330,241\n",
      "Trainable params: 5,330,241\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "# 2,定义model模型\n",
    "inputs = keras.Input(batch_input_shape=(batch_size,None))\n",
    "print(inputs.shape)\n",
    "outputs = keras.layers.Embedding(vocab_size, embedding_dim)(inputs)\n",
    "print(outputs.shape)\n",
    "outputs = keras.layers.LSTM(units = rnn_units,stateful = True,recurrent_initializer='glorot_uniform',return_sequences = True)(outputs)\n",
    "print(outputs.shape)\n",
    "outputs = keras.layers.Dense(vocab_size)(outputs)\n",
    "print(outputs.shape)\n",
    "\n",
    "model = Model(inputs, outputs)\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "0392a946",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(64, 100, 65)\n",
      "tf.Tensor(\n",
      "[[41]\n",
      " [46]\n",
      " [27]\n",
      " [41]\n",
      " [12]\n",
      " [26]\n",
      " [48]\n",
      " [ 0]\n",
      " [12]\n",
      " [33]\n",
      " [29]\n",
      " [50]\n",
      " [32]\n",
      " [18]\n",
      " [ 7]\n",
      " [28]\n",
      " [64]\n",
      " [25]\n",
      " [64]\n",
      " [ 8]\n",
      " [40]\n",
      " [21]\n",
      " [ 7]\n",
      " [34]\n",
      " [58]\n",
      " [37]\n",
      " [23]\n",
      " [11]\n",
      " [56]\n",
      " [ 3]\n",
      " [41]\n",
      " [42]\n",
      " [33]\n",
      " [ 9]\n",
      " [ 0]\n",
      " [41]\n",
      " [ 4]\n",
      " [62]\n",
      " [38]\n",
      " [58]\n",
      " [46]\n",
      " [64]\n",
      " [36]\n",
      " [15]\n",
      " [33]\n",
      " [14]\n",
      " [52]\n",
      " [53]\n",
      " [26]\n",
      " [12]\n",
      " [ 0]\n",
      " [28]\n",
      " [34]\n",
      " [39]\n",
      " [50]\n",
      " [51]\n",
      " [62]\n",
      " [14]\n",
      " [19]\n",
      " [16]\n",
      " [29]\n",
      " [ 7]\n",
      " [41]\n",
      " [10]\n",
      " [31]\n",
      " [52]\n",
      " [48]\n",
      " [21]\n",
      " [61]\n",
      " [23]\n",
      " [28]\n",
      " [26]\n",
      " [38]\n",
      " [ 6]\n",
      " [64]\n",
      " [62]\n",
      " [16]\n",
      " [10]\n",
      " [36]\n",
      " [27]\n",
      " [27]\n",
      " [64]\n",
      " [15]\n",
      " [64]\n",
      " [58]\n",
      " [35]\n",
      " [64]\n",
      " [ 9]\n",
      " [59]\n",
      " [25]\n",
      " [ 5]\n",
      " [11]\n",
      " [24]\n",
      " [42]\n",
      " [31]\n",
      " [50]\n",
      " [13]\n",
      " [30]\n",
      " [31]\n",
      " [51]], shape=(100, 1), dtype=int64)\n",
      "tf.Tensor(\n",
      "[41 46 27 41 12 26 48  0 12 33 29 50 32 18  7 28 64 25 64  8 40 21  7 34\n",
      " 58 37 23 11 56  3 41 42 33  9  0 41  4 62 38 58 46 64 36 15 33 14 52 53\n",
      " 26 12  0 28 34 39 50 51 62 14 19 16 29  7 41 10 31 52 48 21 61 23 28 26\n",
      " 38  6 64 62 16 10 36 27 27 64 15 64 58 35 64  9 59 25  5 11 24 42 31 50\n",
      " 13 30 31 51], shape=(100,), dtype=int64)\n",
      "Input:  \"?\\nWhere are your mess of sons to back you now?\\nThe wanton Edward, and the lusty George?\\nAnd where's \"\n",
      "\n",
      "Output:  \"\\nWhere are your mess of sons to back you now?\\nThe wanton Edward, and the lusty George?\\nAnd where's t\"\n",
      "\n",
      "Predictions:  \"chOc?Nj\\n?UQlTF-PzMz.bI-VtYK;r$cdU3\\nc&xZthzXCUBnoN?\\nPValmxBGDQ-c:SnjIwKPNZ,zxD:XOOzCztWz3uM';LdSlARSm\"\n"
     ]
    }
   ],
   "source": [
    "# 3,单个例子测试模型\n",
    "for input_example_batch, target_example_batch in seq_dataset.take(1):\n",
    "    example_batch_predictions = model(input_example_batch)\n",
    "    print(example_batch_predictions.shape)\n",
    "\n",
    "# random sampling.\n",
    "# greedy, random.\n",
    "# 测试单个例子的结果\n",
    "sample_indices = tf.random.categorical(logits = example_batch_predictions[0], num_samples = 1)\n",
    "print(sample_indices)\n",
    "# (100, 65) -> (100, 1)\n",
    "sample_indices = tf.squeeze(sample_indices, axis = -1)\n",
    "print(sample_indices)\n",
    "\n",
    "\n",
    "# 打印输入,目标,预测的结果\n",
    "print(\"Input: \", repr(\"\".join(idx2char[input_example_batch[0]])))\n",
    "print()\n",
    "print(\"Output: \", repr(\"\".join(idx2char[target_example_batch[0]])))\n",
    "print()\n",
    "print(\"Predictions: \", repr(\"\".join(idx2char[sample_indices])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "2f4fbdfa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(64, 100)\n",
      "4.1738906\n"
     ]
    }
   ],
   "source": [
    "# 三,定义损失函数和优化器\n",
    "def loss(labels, logits):\n",
    "    return keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)\n",
    "\n",
    "# 定义优化器和自定义损失函数\n",
    "model.compile(optimizer = 'adam', loss = loss)\n",
    "\n",
    "# 测试计算单例的损失数\n",
    "example_loss = loss(target_example_batch, example_batch_predictions)\n",
    "print(example_loss.shape)\n",
    "print(example_loss.numpy().mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "485a24b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 四,callback模块-checkpoints\n",
    "output_dir = \"./text_generation_lstm3_checkpoints\"\n",
    "if not os.path.exists(output_dir):\n",
    "    os.mkdir(output_dir)\n",
    "checkpoint_prefix = os.path.join(output_dir, 'ckpt_{epoch}')\n",
    "checkpoint_callback = keras.callbacks.ModelCheckpoint(filepath = checkpoint_prefix,save_weights_only = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "ee0599cf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.4082\n",
      "Epoch 2/100\n",
      "172/172 [==============================] - 10s 52ms/step - loss: 0.4061\n",
      "Epoch 3/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.4057\n",
      "Epoch 4/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.4055\n",
      "Epoch 5/100\n",
      "172/172 [==============================] - 9s 45ms/step - loss: 0.4052\n",
      "Epoch 6/100\n",
      "172/172 [==============================] - 9s 45ms/step - loss: 0.4036\n",
      "Epoch 7/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.4031\n",
      "Epoch 8/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.4041\n",
      "Epoch 9/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.4013\n",
      "Epoch 10/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.4015\n",
      "Epoch 11/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.4002\n",
      "Epoch 12/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3983\n",
      "Epoch 13/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3962\n",
      "Epoch 14/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3954\n",
      "Epoch 15/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3928\n",
      "Epoch 16/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3951\n",
      "Epoch 17/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3940\n",
      "Epoch 18/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3936\n",
      "Epoch 19/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3936\n",
      "Epoch 20/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3981\n",
      "Epoch 21/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3970\n",
      "Epoch 22/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3986\n",
      "Epoch 23/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3941\n",
      "Epoch 24/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3935\n",
      "Epoch 25/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3920\n",
      "Epoch 26/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3928\n",
      "Epoch 27/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3936\n",
      "Epoch 28/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3925\n",
      "Epoch 29/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3918\n",
      "Epoch 30/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3915\n",
      "Epoch 31/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3950\n",
      "Epoch 32/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3936\n",
      "Epoch 33/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3920\n",
      "Epoch 34/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3906\n",
      "Epoch 35/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3919\n",
      "Epoch 36/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3895\n",
      "Epoch 37/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3908\n",
      "Epoch 38/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3891\n",
      "Epoch 39/100\n",
      "172/172 [==============================] - 9s 46ms/step - loss: 0.3894\n",
      "Epoch 40/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3929\n",
      "Epoch 41/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3906\n",
      "Epoch 42/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3908\n",
      "Epoch 43/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3889\n",
      "Epoch 44/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3892\n",
      "Epoch 45/100\n",
      "172/172 [==============================] - 9s 47ms/step - loss: 0.3904\n",
      "Epoch 46/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3879\n",
      "Epoch 47/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3914\n",
      "Epoch 48/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3931\n",
      "Epoch 49/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3904\n",
      "Epoch 50/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3947\n",
      "Epoch 51/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3963\n",
      "Epoch 52/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3906\n",
      "Epoch 53/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3919\n",
      "Epoch 54/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3913\n",
      "Epoch 55/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.3887\n",
      "Epoch 56/100\n",
      "172/172 [==============================] - 9s 48ms/step - loss: 0.3910\n",
      "Epoch 57/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3914\n",
      "Epoch 58/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3924\n",
      "Epoch 59/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3957\n",
      "Epoch 60/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3945\n",
      "Epoch 61/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3931\n",
      "Epoch 62/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3941\n",
      "Epoch 63/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3962\n",
      "Epoch 64/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.3959\n",
      "Epoch 65/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.3964\n",
      "Epoch 66/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3965\n",
      "Epoch 67/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3946\n",
      "Epoch 68/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3970\n",
      "Epoch 69/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3971\n",
      "Epoch 70/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3963\n",
      "Epoch 71/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.3976\n",
      "Epoch 72/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.3978\n",
      "Epoch 73/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.3979\n",
      "Epoch 74/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4021\n",
      "Epoch 75/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4043\n",
      "Epoch 76/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4022\n",
      "Epoch 77/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4053\n",
      "Epoch 78/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4050\n",
      "Epoch 79/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4036\n",
      "Epoch 80/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4015\n",
      "Epoch 81/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4025\n",
      "Epoch 82/100\n",
      "172/172 [==============================] - 9s 49ms/step - loss: 0.4047\n",
      "Epoch 83/100\n",
      "172/172 [==============================] - 9s 51ms/step - loss: 0.4042\n",
      "Epoch 84/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4042\n",
      "Epoch 85/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4061\n",
      "Epoch 86/100\n",
      "172/172 [==============================] - 9s 51ms/step - loss: 0.4065\n",
      "Epoch 87/100\n",
      "172/172 [==============================] - 9s 51ms/step - loss: 0.4089\n",
      "Epoch 88/100\n",
      "172/172 [==============================] - 9s 51ms/step - loss: 0.4119\n",
      "Epoch 89/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4150\n",
      "Epoch 90/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4204\n",
      "Epoch 91/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4168\n",
      "Epoch 92/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4172\n",
      "Epoch 93/100\n",
      "172/172 [==============================] - 9s 51ms/step - loss: 0.4168\n",
      "Epoch 94/100\n",
      "172/172 [==============================] - 10s 51ms/step - loss: 0.4158\n",
      "Epoch 95/100\n",
      "172/172 [==============================] - 9s 51ms/step - loss: 0.4173\n",
      "Epoch 96/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4130\n",
      "Epoch 97/100\n",
      "172/172 [==============================] - 10s 52ms/step - loss: 0.4120\n",
      "Epoch 98/100\n",
      "172/172 [==============================] - 10s 52ms/step - loss: 0.4147\n",
      "Epoch 99/100\n",
      "172/172 [==============================] - 9s 51ms/step - loss: 0.4198\n",
      "Epoch 100/100\n",
      "172/172 [==============================] - 9s 50ms/step - loss: 0.4270\n"
     ]
    }
   ],
   "source": [
    "# 五,训练模型\n",
    "epochs = 100\n",
    "history = model.fit(seq_dataset, epochs = epochs,callbacks = [checkpoint_callback])\n",
    "\n",
    "# 会自动找到最近保存的变量文件\n",
    "new_checkpoint = tf.train.latest_checkpoint(output_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "805747b3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1, None)\n",
      "(1, None, 256)\n",
      "(1, None, 1024)\n",
      "(1, None, 65)\n",
      "Model: \"model_1\"\n",
      "_________________________________________________________________\n",
      " Layer (type)                Output Shape              Param #   \n",
      "=================================================================\n",
      " input_2 (InputLayer)        [(1, None)]               0         \n",
      "                                                                 \n",
      " embedding_1 (Embedding)     (1, None, 256)            16640     \n",
      "                                                                 \n",
      " lstm_1 (LSTM)               (1, None, 1024)           5246976   \n",
      "                                                                 \n",
      " dense_1 (Dense)             (1, None, 65)             66625     \n",
      "                                                                 \n",
      "=================================================================\n",
      "Total params: 5,330,241\n",
      "Trainable params: 5,330,241\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "# 六,定义预测模型\n",
    "# 1,使用checkpoint加载模型\n",
    "inputs = keras.Input(batch_input_shape=(1,None))\n",
    "print(inputs.shape)\n",
    "outputs = keras.layers.Embedding(vocab_size, embedding_dim)(inputs)\n",
    "print(outputs.shape)\n",
    "outputs = keras.layers.LSTM(units = rnn_units,stateful = True,recurrent_initializer='glorot_uniform',\n",
    "                        return_sequences = True)(outputs)\n",
    "print(outputs.shape)\n",
    "outputs = keras.layers.Dense(vocab_size)(outputs)\n",
    "print(outputs.shape)\n",
    "\n",
    "model2 = Model(inputs, outputs)\n",
    "model2.load_weights(tf.train.latest_checkpoint(output_dir))\n",
    "model2.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "b6e201d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2.使用h5文件加载模型\n",
    "from keras.models import load_model\n",
    "model2 = load_model('model.h5', compile=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "d99e6d71",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "All kiss m\n"
     ]
    }
   ],
   "source": [
    "# 七,预测模型做预测\n",
    "def generate_text(model, start_string, num_generate = 1000):\n",
    "    input_eval = [char2idx[ch] for ch in start_string]\n",
    "    input_eval = tf.expand_dims(input_eval, 0)\n",
    "    \n",
    "    text_generated = []\n",
    "    model.reset_states()\n",
    "    \n",
    "    for _ in range(num_generate):\n",
    "        # 输入进入模型，得到预测\n",
    "        predictions = model(input_eval)\n",
    "        # 去掉一维的结构\n",
    "        predictions = tf.squeeze(predictions, 0)\n",
    "        # 根据此位置的词库中不同字符的可能性进行采样，获得预测出的字符\n",
    "        predicted_id = tf.random.categorical(predictions, num_samples = 1)[-1, 0].numpy()\n",
    "        # 将预测出的字符最后一位添加到已经生成的字符串末尾\n",
    "        text_generated.append(idx2char[predicted_id])\n",
    "        # 将预测出的字符最后一位作为输入再次输入到模型中预测\n",
    "        input_eval = tf.expand_dims([predicted_id], 0)\n",
    "    return start_string + ''.join(text_generated)\n",
    "\n",
    "new_text = generate_text(model2, \"Al\",8)\n",
    "print(new_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "e6a63d0f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n"
     ]
    }
   ],
   "source": [
    "# 将模型保存为文件，节省空间\n",
    "# model2.save(\"model.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d617fa97",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "TensorGPU",
   "language": "python",
   "name": "tensorgpu"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}