#数据增强
from matplotlib import pyplot as plt
from mxnet import image
from mxnet.gluon import data as gdata, utils

utils.download('https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/cat.jpg')
img = image.imread('cat.jpg')
plt.imshow(img.asnumpy())
plt.show()

def show_images(imgs, num_rows, num_cols, scale=2):
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
    for i in range(num_rows):
        for j in range(num_cols):
            axes[i][j].imshow(imgs[i * num_cols + j].asnumpy())
            axes[i][j].axes.get_xaxis().set_visible(False)
            axes[i][j].axes.get_yaxis().set_visible(False)
    return axes

def apply(img, aug, num_rows=2, num_cols=4, scale=3):
    Y = [aug(img) for _ in range(num_rows * num_cols)]
    show_images(Y, num_rows, num_cols, scale)

#Flip and Crop
#随机左右翻转
apply(img, gdata.vision.transforms.RandomFlipLeftRight())
#随机上下翻转
apply(img, gdata.vision.transforms.RandomFlipTopBottom())
#随机中心裁剪
shape_aug = gdata.vision.transforms.RandomResizedCrop((200, 200), scale=(0.1, 1), ratio=(0.5, 2))
apply(img, shape_aug)
#随机亮度
apply(img, gdata.vision.transforms.RandomBrightness(0.5))
#随机色调
apply(img, gdata.vision.transforms.RandomHue(0.5))
#亮度、对比度、饱和度、色调
color_aug = gdata.vision.transforms.RandomColorJitter(
    brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5)
apply(img, color_aug)

#混合效果
augs = gdata.vision.transforms.Compose([
    gdata.vision.transforms.RandomFlipLeftRight(), color_aug, shape_aug])
apply(img, augs)


#使用Gluon mxnet.gluon.data.Dataset进行数据增强
import mxnet as mx # used version '1.0.0' at time of writing
import numpy as np
from matplotlib.pyplot import imshow
import multiprocessing
import os

mx.random.seed(42) # set seed for repeatability
def plot_mx_array(array):
    """
    Array expected to be height x width x 3 (channels), and values are floats between 0 and 255.
    """
    assert array.shape[2] == 3, "RGB Channel should be last"
    imshow((array.clip(0, 255)/255).asnumpy())
image_folder = os.path.join('data','images')
mx.test_utils.download('https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/data_aug/inputs/0.jpg', dirname=image_folder)
example_image = mx.image.imread(os.path.join(image_folder, "0.jpg")).astype("float32")
plot_mx_array(example_image)

def aug_transform(data, label):
    data = data.astype('float32')/255
    augs = mx.image.CreateAugmenter(data_shape=(3, 300, 300),
                                    rand_crop=0.5, rand_mirror=True, inter_method=10,
                                    brightness=0.125, contrast=0.125, saturation=0.125,
                                    pca_noise=0.02)
    for aug in augs:
        data = aug(data)
    return data, label


training_dataset = mx.gluon.data.vision.ImageFolderDataset('data', transform=aug_transform)

sample = training_dataset[0]
sample_data = sample[0]
plot_mx_array(sample_data*255)
#加载数据集
batch_size = 1
training_data_loader = mx.gluon.data.DataLoader(training_dataset, batch_size=1, shuffle=True)

for data_batch, label_batch in training_data_loader:
    plot_mx_array(data_batch[0]*255)
    assert data_batch.shape == (1, 300, 300, 3)
    assert label_batch.shape == (1,)
    break



#自定义层
from mxnet import gluon, nd
from mxnet.gluon import nn


class CenteredLayer(nn.Block):
    def __init__(self, **kwargs):
        super(CenteredLayer, self).__init__(**kwargs)

    def forward(self, x):
        return x - x.mean()
layer = CenteredLayer()
layer(nd.array([1, 2, 3, 4, 5]))
print(layer)

#创建更复杂层
net = nn.Sequential()
net.add(nn.Dense(128),
        CenteredLayer())
net.initialize()
print(net)

y = net(nd.random.uniform(shape=(4, 8)))
y.mean().asscalar()


params = gluon.ParameterDict()
params.get('param2', shape=(2, 3))
print(params)
class MyDense(nn.Block):

    def __init__(self, units, in_units, **kwargs):
        # units: the number of outputs in this layer
        # in_units: the number of inputs in this layer

        super(MyDense, self).__init__(**kwargs)
        self.weight = self.params.get('weight', shape=(in_units, units))
        self.bias = self.params.get('bias', shape=(units,))

    def forward(self, x):
        linear = nd.dot(x, self.weight.data()) + self.bias.data()
        return nd.relu(linear)
dense = MyDense(units=3, in_units=5)
dense.params
dense.initialize()
dense(nd.random.uniform(shape=(2, 5)))
print(dense)

net = nn.Sequential()
net.add(MyDense(8, in_units=64),
        MyDense(1, in_units=8))
net.initialize()
net(nd.random.uniform(shape=(2, 64)))
print(net)


#构建layer和block
from mxnet import nd
from mxnet.gluon import nn
x = nd.random.uniform(shape=(2, 20))

net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()
net(x)

class MLP(nn.Block):
    # Declare a layer with model parameters. Here, we declare two fully
    # connected layers.

    def __init__(self, **kwargs):
        # Call the constructor of the MLP parent class Block to perform the
        # necessary initialization. In this way, other function parameters can
        # also be specified when constructing an instance, such as the model
        # parameter, params, described in the following sections.
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu')  # Hidden layer
        self.output = nn.Dense(10)  # Output layer

    # Define the forward computation of the model, that is, how to return the
    # required model output based on the input x.

    def forward(self, x):
        hidden_out = self.hidden(x)
        return self.output(hidden_out)
net = MLP()
net.initialize()
net(x)


#运用sequentiial
class MySequential(nn.Block):
    def __init__(self, **kwargs):
        super(MySequential, self).__init__(**kwargs)

    def add(self, block):
        # Here, block is an instance of a Block subclass, and we assume it has a unique name. We save it in the
        # member variable _children of the Block class, and its type is OrderedDict. When the MySequential instance
        # calls the initialize function, the system automatically initializes all members of _children.
        self._children[block.name] = block

    def forward(self, x):
        # OrderedDict guarantees that members will be traversed in the order they were added.
        for block in self._children.values():
            x = block(x)
        return x
net = MySequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()
net(x)


#常量参数
class FancyMLP(nn.Block):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)

        # Random weight parameters created with the get_constant are not
        # iterated during training (i.e. constant parameters).
        self.rand_weight = self.params.get_constant(
            'rand_weight', nd.random.uniform(shape=(20, 20)))
        self.dense = nn.Dense(20, activation='relu')

    def forward(self, x):
        x = self.dense(x)
        # Use the constant parameters created, as well as the ReLU and dot
        # functions of NDArray.

        x = nd.relu(nd.dot(x, self.rand_weight.data()) + 1)
        # Re-use the fully connected layer. This is equivalent to sharing
        # parameters with two fully connected layers.
        x = self.dense(x)
        # Here in the control flow, we need to call `asscalar` to return the
        # scalar for comparison.

        while x.norm().asscalar() > 1:
            x /= 2
        if x.norm().asscalar() < 0.8:
            x *= 10
        return x.sum()
net = FancyMLP()
net.initialize()
net(x)


#组合
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(nn.Dense(64, activation='relu'),
                     nn.Dense(32, activation='relu'))
        self.dense = nn.Dense(16, activation='relu')

    def forward(self, x):
        return self.dense(self.net(x))

chimera = nn.Sequential()
chimera.add(NestMLP(), nn.Dense(20), FancyMLP())

chimera.initialize()
chimera(x)


#数据规范化
import mxnet as mx
from mxnet.gluon.data.vision.transforms import Normalize

image_int = mx.nd.random.randint(low=0, high=256, shape=(1,3,2,2))
image_float = image_int.astype('float32')/255
# the following normalization statistics are taken from gluon model zoo
normalizer = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
image = normalizer(image_float)
image

#图层规范化
data = mx.nd.arange(start=0, stop=2*4*2).reshape(2, 4, 2)
print(data)
net = mx.gluon.nn.LayerNorm()
net.initialize()
output = net(data)
print(output)
print('beta:', net.beta.data().asnumpy())
print('gamma:', net.gamma.data().asnumpy())

#可视化激活函数
import numpy as np
import mxnet as mx
from matplotlib import pyplot as plt

def visualize_activation(activation_fn):
    data = np.linspace(-10, 10, 501)
    x = mx.nd.array(data)
    x.attach_grad()
    with mx.autograd.record():
        y = activation_fn(x)
    y.backward()

    plt.figure()
    plt.plot(data, y.asnumpy())
    plt.plot(data, x.grad.asnumpy())
    activation = activation_fn.name[:-1]
    plt.legend(["{} activation".format(activation), "{} gradient".format(activation)])

visualize_activation(mx.gluon.nn.Activation('sigmoid'))
visualize_activation(mx.gluon.nn.Activation('tanh'))
#softsign
visualize_activation(mx.gluon.nn.Activation('softsign'))
visualize_activation(mx.gluon.nn.Activation('relu'))
visualize_activation(mx.gluon.nn.Activation('softrelu'))

visualize_activation(mx.gluon.nn.LeakyReLU(0.05))
prelu = mx.gluon.nn.PReLU(mx.init.Normal(0.05))
prelu.initialize()

visualize_activation(prelu)
visualize_activation(mx.gluon.nn.ELU())
visualize_activation(mx.gluon.nn.SELU())
visualize_activation(mx.gluon.nn.Swish())



#损失函数
from IPython import display
from matplotlib import pyplot as plt
from mxnet import nd, autograd
from mxnet.gluon import nn, loss as gloss

loss = gloss.L2Loss()
x = nd.ones((2,))
y = nd.ones((2,)) * 2
loss(x, y)
0.5 * (x - y)**2


#计算损失函数梯度
X = nd.random.uniform(shape=(2, 4))
net = nn.Dense(1)
net.initialize()
with autograd.record():
    l =  loss(net(X), y)
l
l.backward()
net.weight.grad()

#可视化损失函数
def plot(x, y):
    display.set_matplotlib_formats('svg')
    plt.plot(x.asnumpy(), y.asnumpy())
    plt.xlabel('x')
    plt.ylabel('loss')
    plt.show()

def show_regression_loss(loss):
    x = nd.arange(-5, 5, .1)
    y = loss(x, nd.zeros_like(x))
    plot(x, y)

show_regression_loss(gloss.L1Loss())

show_regression_loss(gloss.L2Loss())


def show_classification_loss(loss):
    x = nd.arange(-5, 5, .1)
    y = loss(x, nd.ones_like(x))
    plot(x, y)

show_classification_loss(gloss.LogisticLoss())
show_classification_loss(gloss.HingeLoss() )

loss = gloss.SoftmaxCrossEntropyLoss()
x = nd.array([[1, 10], [8, 2]])
y = nd.array([0, 1])
loss(x, y)


#自定义损失块
import matplotlib.pyplot as plt
import mxnet as mx
from mxnet import autograd, gluon, nd
from mxnet.gluon.loss import Loss
import random

class ContrastiveLoss(Loss):
    def __init__(self, margin=6., weight=None, batch_axis=0, **kwargs):
        super(ContrastiveLoss, self).__init__(weight, batch_axis, **kwargs)
        self.margin = margin

    def hybrid_forward(self, F, image1, image2, label):
        distances = image1 - image2
        distances_squared = F.sum(F.square(distances), 1, keepdims=True)
        euclidean_distances = F.sqrt(distances_squared + 0.0001)
        d = F.clip(self.margin - euclidean_distances, 0, self.margin)
        loss = (1 - label) * distances_squared + label * F.square(d)
        loss = 0.5*loss
        return loss
loss = ContrastiveLoss(margin=6.0)

#Define the Siamese network
class Siamese(gluon.HybridBlock):
    def __init__(self, **kwargs):
        super(Siamese, self).__init__(**kwargs)
        with self.name_scope():
            self.cnn = gluon.nn.HybridSequential()
            with self.cnn.name_scope():
                self.cnn.add(gluon.nn.Conv2D(64, 5, activation='relu'))
                self.cnn.add(gluon.nn.MaxPool2D(2, 2))
                self.cnn.add(gluon.nn.Conv2D(64, 5, activation='relu'))
                self.cnn.add(gluon.nn.MaxPool2D(2, 2))
                self.cnn.add(gluon.nn.Dense(256, activation='relu'))
                self.cnn.add(gluon.nn.Dense(2, activation='softrelu'))

    def hybrid_forward(self, F, input0, input1):
        out0 = self.cnn(input0)
        out1 = self.cnn(input1)
        return out0, out1


class GetImagePairs(mx.gluon.data.vision.ImageFolderDataset):
    def __init__(self, root):
        super(GetImagePairs, self).__init__(root, flag=0)
        self.root = root

    def __getitem__(self, index):
        items_with_index = list(enumerate(self.items))
        image0_index, image0_tuple = random.choice(items_with_index)
        should_get_same_class = random.randint(0, 1)
        if should_get_same_class:
            while True:
                image1_index, image1_tuple = random.choice(items_with_index)
                if image0_tuple[1] == image1_tuple[1]:
                    break
        else:
            image1_index, image1_tuple = random.choice(items_with_index)
        image0 = super().__getitem__(image0_index)
        image1 = super().__getitem__(image1_index)
        label = mx.nd.array([int(image1_tuple[1] != image0_tuple[1])])
        return image0[0], image1[0], label

    def __len__(self):
        return super().__len__()

def transform(img0, img1, label):
    normalized_img0 = nd.transpose(img0.astype('float32'), (2, 0, 1))/255.0
    normalized_img1 = nd.transpose(img1.astype('float32'), (2, 0, 1))/255.0
    return normalized_img0, normalized_img1, label

training_dir = "images_background/Tifinagh"
testing_dir = "images_background/Inuktitut_(Canadian_Aboriginal_Syllabics)"
train = GetImagePairs(training_dir)
test = GetImagePairs(testing_dir)
train_dataloader = gluon.data.DataLoader(train.transform(transform),
                                        shuffle=True, batch_size=16)
test_dataloader = gluon.data.DataLoader(test.transform(transform),
                                        shuffle=False, batch_size=1)

img1, img2, label = test[0]
print("Same: {}".format(int(label.asscalar()) == 0))
fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, 5))
ax0.imshow(img1.asnumpy()[:,:,0], cmap='gray')
ax0.axis('off')
ax1.imshow(img2.asnumpy()[:,:,0], cmap='gray')
ax1.axis("off")
plt.show()

#训练网络
model = Siamese()
model.initialize(init=mx.init.Xavier())
trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': 0.001})
loss = ContrastiveLoss(margin=6.0)

for epoch in range(10):
    for i, data in enumerate(train_dataloader):
        image1, image2, label = data
        with autograd.record():
            output1, output2 = model(image1, image2)
            loss_contrastive = loss(output1, output2, label)
        loss_contrastive.backward()
        trainer.step(image1.shape[0])
        loss_mean = loss_contrastive.mean().asscalar()
        print("Epoch number {}\n Current loss {}\n".format(epoch, loss_mean))
#测试
for i, data in enumerate(test_dataloader):
    img1, img2, label = data
    output1, output2 = model(img1, img2)
    dist_sq = mx.ndarray.sum(mx.ndarray.square(output1 - output2))
    dist = mx.ndarray.sqrt(dist_sq).asscalar()
    print("Euclidean Distance:", dist, "Test label", label[0].asscalar())
    fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, 5))
    ax0.imshow(img1.asnumpy()[0, 0, :, :], cmap='gray')
    ax0.axis('off')
    ax1.imshow(img2.asnumpy()[0, 0, :, :], cmap='gray')
    ax1.axis("off")
    plt.show()



#参数管理
 from mxnet import init, nd
from mxnet.gluon import nn
 
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()
 
x = nd.random.uniform(shape=(2,20))
y = net(x)

print(net[0].params)
print(net[0].collect_params())

print(net.params)
print(net.collect_params())

#访问特定层,一般使用方法1
net[0].weight
net[0].params['dense0_weight']

net[0].weight.data()
net[0].weight.grad()

#声明方法
params = gluon.ParameterDict()
params.get("param2", shape=(2, 3))
params

#声明方法2
my_param = gluon.Parameter('exciting_parameter_yay', shape=(3, 3))
my_param.initialize()

#初始化模型参数
# 非首次对模型初始化需要指定 force_reinit,force_reinit为了防止用户失误将参数全部取消
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)


#自定义初始化方法1
net[0].weight.set_data(net[0].weight.data()+1)
net[0].weight.data()[0]

#自定义初始化方法2
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5
 
net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]

#共享模型参数
from mxnet import nd
from mxnet.gluon import nn
 
net = nn.Sequential()
shared = nn.Dense(8, activation='relu')
net.add(nn.Dense(8, activation='relu'),
        shared,
        nn.Dense(8, activation='relu', params=shared.params),
        nn.Dense(10))
net.initialize()
 
x = nd.random.uniform(shape=(2,20))
net(x)
 
net[1].weight.data()[0] == net[2].weight.data()[0]

#模型延后初始化
from mxnet import init,nd
from mxnet.gluon import nn
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
net= nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize(init=MyInit())

x=nd.random.uniform(shape=(2,20))
y=net(x)
#该初始化只会在第一次执行被调用。再次运行则不会重新初始化
y=net(x)  

from mxnet import init, nd
from mxnet.gluon import nn
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()  # Use the default initialization method

x = nd.random.uniform(shape=(2, 20))
net(x)            # Forward computation

print(net[0].params)
print(net[1].params)

print(net[1].bias)
print(net[1].bias.data())

print(net[0].params['dense0_weight'])
print(net[0].params['dense0_weight'].data())


net[0].weight.grad()

# Parameters only for the first layer
print(net[0].collect_params())
# Parameters of the entire network
print(net.collect_params())

net.collect_params()['dense1_bias'].data()
print(net.collect_params('.*weight'))
print(net.collect_params('dense0.*'))


def block1():
    net = nn.Sequential()
    net.add(nn.Dense(32, activation='relu'))
    net.add(nn.Dense(16, activation='relu'))
    return net

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add(block1())
    return net

rgnet = nn.Sequential()
rgnet.add(block2())
rgnet.add(nn.Dense(10))
rgnet.initialize()
rgnet(x)


print(rgnet.collect_params)
print(rgnet.collect_params())

rgnet[0][1][0].bias.data()
#保存加载参数
rgnet.save_parameters('model.params')
rgnet.load_parameters('model.params')

#参数初始化
# force_reinit ensures that the variables are initialized again,
# regardless of whether they were already initialized previously
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]

net.initialize(init=init.Constant(1), force_reinit=True)
net[0].weight.data()[0]

net[1].initialize(init=init.Constant(42), force_reinit=True)
net[0].weight.initialize(init=init.Xavier(), force_reinit=True)
print(net[1].weight.data()[0,0])
print(net[0].weight.data()[0])

#自定义初始化方法
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5

net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]


net[0].weight.data()[:] += 1
net[0].weight.data()[0,0] = 42
net[0].weight.data()[0]

#共享参数
net = nn.Sequential()
# We need to give the shared layer a name such that we can reference
# its parameters
shared = nn.Dense(8, activation='relu')
net.add(nn.Dense(8, activation='relu'),
        shared,
        nn.Dense(8, activation='relu', params=shared.params),
        nn.Dense(10))
net.initialize()

x = nd.random.uniform(shape=(2, 20))
net(x)

# Check whether the parameters are the same
print(net[1].weight.data()[0] == net[2].weight.data()[0])
net[1].weight.data()[0,0] = 100
# And make sure that they're actually the same object rather
# than just having the same value
print(net[1].weight.data()[0] == net[2].weight.data()[0])


#层块命名
from __future__ import print_function
import mxnet as mx
from mxnet import gluon
mydense = gluon.nn.Dense(100, prefix='mydense_')
print(mydense.prefix)

dense0 = gluon.nn.Dense(100)
print(dense0.prefix)

dense1 = gluon.nn.Dense(100)
print(dense1.prefix)

print(dense0.collect_params())

#运用name scopes
class Model(gluon.Block):
    def __init__(self, **kwargs):
        super(Model, self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = gluon.nn.Dense(20)
            self.dense1 = gluon.nn.Dense(20)
            self.mydense = gluon.nn.Dense(20, prefix='mydense_')

    def forward(self, x):
        x = mx.nd.relu(self.dense0(x))
        x = mx.nd.relu(self.dense1(x))
        return mx.nd.relu(self.mydense(x))
model0 = Model()
model0.initialize()
model0(mx.nd.zeros((1, 20)))
print(model0.prefix)
print(model0.dense0.prefix)
print(model0.dense1.prefix)
print(model0.mydense.prefix)

model1 = Model()
print(model1.prefix)
print(model1.dense0.prefix)
print(model1.dense1.prefix)
print(model1.mydense.prefix)


net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(20))
    net.add(gluon.nn.Dense(20))
print(net.prefix)
print(net[0].prefix)
print(net[1].prefix)

net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.model_zoo.vision.alexnet(pretrained=True))
    net.add(gluon.model_zoo.vision.alexnet(pretrained=True))
print(net.prefix, net[0].prefix, net[1].prefix)