Skip to content

Commit

Permalink
update vislog to be a bit more fancy, has GPT-2/3 perf for hellaswag
Browse files Browse the repository at this point in the history
  • Loading branch information
karpathy committed Jun 19, 2024
1 parent 3bce68b commit 0bc8753
Showing 1 changed file with 45 additions and 22 deletions.
67 changes: 45 additions & 22 deletions dev/vislog.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -61,63 +61,86 @@
"metadata": {},
"outputs": [],
"source": [
"sz = \"350M\"\n",
"import numpy as np\n",
"\n",
"sz = \"124M\"\n",
"loss_baseline = {\n",
" \"124M\": 3.424958,\n",
" \"350M\": 3.083089,\n",
" \"774M\": 3.000580,\n",
" \"1558M\": 2.831273,\n",
"}[sz]\n",
"hella_baseline = {\n",
"hella2_baseline = { # for GPT-2\n",
" \"124M\": 0.294463,\n",
" \"350M\": 0.375224,\n",
" \"774M\": 0.431986,\n",
" \"1558M\": 0.488946,\n",
"}[sz]\n",
"\n",
"hella3_baseline = { # for GPT-3\n",
" \"124M\": 0.337,\n",
" \"350M\": 0.436,\n",
" \"774M\": 0.510,\n",
" \"1558M\": 0.547,\n",
"}[sz]\n",
"# assumes each model run is stored in this way\n",
"logfile = f\"../log{sz}/main.log\"\n",
"logfile = f\"../log_gpt2_{sz}/main.log\"\n",
"streams = parse_logfile(logfile)\n",
"\n",
"# optional function that smooths out the loss some\n",
"def smooth_moving_average(signal, window_size):\n",
" if signal.ndim != 1:\n",
" raise ValueError(\"smooth_moving_average only accepts 1D arrays.\")\n",
" if signal.size < window_size:\n",
" raise ValueError(\"Input vector needs to be bigger than window size.\")\n",
" if window_size < 3:\n",
" return signal\n",
"\n",
" s = np.pad(signal, (window_size//2, window_size-1-window_size//2), mode='edge')\n",
" w = np.ones(window_size) / window_size\n",
" smoothed_signal = np.convolve(s, w, mode='valid')\n",
" return smoothed_signal\n",
"\n",
"plt.figure(figsize=(16, 6))\n",
"\n",
"# Panel 1: losses: both train and val\n",
"plt.subplot(121)\n",
"xs, ys = streams[\"trl\"] # training loss\n",
"ys = np.array(ys)\n",
"# smooth out ys using a rolling window\n",
"# ys = smooth_moving_average(ys, 21) # optional\n",
"plt.plot(xs, ys, label=f'llm.c ({sz}) train loss')\n",
"print(\"Min Train Loss:\", min(ys))\n",
"xs, ys = streams[\"tel\"] # validation loss\n",
"plt.plot(xs, ys, label=f'llm.c ({sz}) val loss')\n",
"# horizontal line at GPT-2 baseline\n",
"# we don't have GPT-3 loss on this dataset because the weights were never released\n",
"if loss_baseline is not None:\n",
" plt.axhline(y=loss_baseline, color='r', linestyle='--', label=f\"OpenAI GPT-2 ({sz}) checkpoint val loss\")\n",
"plt.xlabel(\"steps\")\n",
"plt.ylabel(\"loss\")\n",
"plt.yscale('log')\n",
"plt.ylim(top=4.0)\n",
"plt.legend()\n",
"plt.title(\"Loss\")\n",
"print(\"Min Validation Loss:\", min(ys))\n",
"\n",
"# Panel 2: HellaSwag eval\n",
"plt.subplot(122)\n",
"xs, ys = streams[\"eval\"] # HellaSwag eval\n",
"plt.plot(xs, ys, label=f\"llm.c ({sz})\")\n",
"# horizontal line at GPT-2 baseline\n",
"if hella_baseline:\n",
" plt.axhline(y=hella_baseline, color='r', linestyle='--', label=f\"OpenAI GPT-2 ({sz}) checkpoint\")\n",
"plt.xlabel(\"steps\")\n",
"plt.ylabel(\"accuracy\")\n",
"plt.legend()\n",
"plt.title(\"HellaSwag eval\")\n",
"print(\"Max Hellaswag eval:\", max(ys))"
"if \"eval\" in streams:\n",
" xs, ys = streams[\"eval\"] # HellaSwag eval\n",
" ys = np.array(ys)\n",
" plt.plot(xs, ys, label=f\"llm.c ({sz})\")\n",
" # horizontal line at GPT-2/3 baselines\n",
" if hella2_baseline:\n",
" plt.axhline(y=hella2_baseline, color='r', linestyle='--', label=f\"OpenAI GPT-2 ({sz}) checkpoint\")\n",
" if hella3_baseline:\n",
" plt.axhline(y=hella3_baseline, color='g', linestyle='--', label=f\"OpenAI GPT-3 ({sz}) checkpoint\")\n",
" plt.xlabel(\"steps\")\n",
" plt.ylabel(\"accuracy\")\n",
" plt.legend()\n",
" plt.title(\"HellaSwag eval\")\n",
" print(\"Max Hellaswag eval:\", max(ys))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down

0 comments on commit 0bc8753

Please sign in to comment.