UP | HOME

Table of Contents

1. Numerical Stability and Initialization

1.1. Vanishing and Exploding Gradients

1.1.1. Vanishing Gradients

(ns clj-d2l.numerical-stability
  (:require
   [clojure.spec.alpha :as s]
   [clj-djl.ndarray :as nd]
   [clj-djl.training :as t]
   [clj-djl.training.dataset :as ds]
   [clj-djl.training.loss :as loss]
   [clj-djl.training.optimizer :as optimizer]
   [clj-djl.training.tracker :as tracker]
   [clj-djl.training.listener :as listener]
   [clj-djl.model :as m]
   [clj-djl.nn :as nn]
   [clj-djl.device :as dev]
   [clj-d2l.core :as d2l]))
(def ndm (nd/base-manager))
(def x (nd/arange ndm -8.0 8.0 0.1))
(t/attach-gradient x)
(with-open [gc (t/gradient-collector)]
  (let [y (nn/sigmoid x)]
    (t/backward gc y)
    (d2l/plot-lines "figure/numerical_stability_1.svg"
                    ["sigmoid" "gradient"]
                    (nd/to-vec x)
                    [(nd/to-vec y) (nd/to-vec (t/get-gradient x))])))

numerical_stability_1.svg

(def M (nd/random-normal ndm [4 4]))
M
ND: (4, 4) cpu() float32
[[ 2.2122,  1.1631,  0.774 ,  0.4838],
 [ 1.0434,  0.2996,  1.1839,  0.153 ],
 [ 1.8917, -1.1688, -1.2347,  1.5581],
 [-1.771 , -0.5459, -0.4514, -2.3556],
]
(reduce nd/dot M (repeatedly 100 #(nd/random-normal ndm [4 4])))
ND: (4, 4) cpu() float32
[[ 2.23112318e+24, -4.42056910e+23,  6.86103472e+24,  6.96760271e+24],
 [-7.82124006e+23,  1.54969296e+23, -2.40508260e+24, -2.44240483e+24],
 [ 5.04485362e+24, -9.99554626e+23,  1.55135976e+25,  1.57545294e+25],
 [-9.53739183e+23,  1.88953602e+23, -2.93306315e+24, -2.97871740e+24],
]

Author: Kimi Ma

Created: 2022-05-17 Tue 08:06