UFLDL教程(六)之栈式自编码器

素颜马尾好姑娘i 2021-07-16 23:43 565阅读 0赞

0 步:初始化一些参数和常数


第**1**步:利用训练样本集训练第一个稀疏编码器


第**2**步:利用训练样本集训练第二个稀疏编码器


第**3步:利用第二个稀疏编码器提取到的特征训练softmax**回归模型


第**4**步:利用误差反向传播进行微调


第**5**步:利用测试样本集对得到的分类器进行精度测试

下面将程序实现过程中的关键代码post出,欢迎各位网友指点!

stackedAEExercise.m

  1. clc
  2. clear
  3. close all
  4. addpath ../common/
  5. addpath ../common/minFunc
  6. %%======================================================================
  7. %% STEP 0: 设置多层自编码器的相关参数
  8. % 整个网络的输入输出结构
  9. inputSize = 28 * 28;
  10. numClasses = 10;
  11. % 稀疏自编码器结构
  12. hiddenSizeL1 = 200; % Layer 1 Hidden Size
  13. hiddenSizeL2 = 200; % Layer 2 Hidden Size
  14. % 一些权值
  15. sparsityParam = 0.1; % desired average activation of the hidden units.that is ρ in the lecture
  16. beta = 3; % weight of sparsity penalty term
  17. lambda = 3e-3; % weight decay parameter
  18. %%======================================================================
  19. %% STEP 1: 载入MNSIT数据集及标签集
  20. addpath mnist\
  21. trainData = loadMNISTImages('mnist/train-images-idx3-ubyte');
  22. trainLabels = loadMNISTLabels('mnist/train-labels-idx1-ubyte');
  23. trainLabels(trainLabels == 0) = 10; % Remap 0 to 10 since our labels need to start from 1
  24. %%======================================================================
  25. %% STEP 2: 训练第一个稀疏自编码器(训练样本集为trainData,看作是无标签训练样本集)
  26. % Randomly initialize the parameters
  27. sae1Theta = initializeParameters(hiddenSizeL1, inputSize);
  28. % 利用无标签样本集对稀疏自编码器进行学习,学习到的参数存放在向量sae1OptTheta
  29. % 优化函数的一些参数设置
  30. options.Method = 'lbfgs';
  31. options.maxIter = 400; % Maximum number of iterations of L-BFGS to run
  32. options.display = 'on';
  33. % 调用优化函数,得到优化向量sae1OptTheta
  34. [sae1OptTheta, ~] = minFunc( @(p) sparseAutoencoderCost(p, ...
  35. inputSize, hiddenSizeL1, ... %输入维数、输出维数
  36. lambda, sparsityParam, ...
  37. beta, trainData), ...
  38. sae1Theta, options);
  39. %save('sae1OptTheta.mat','sae1OptTheta')
  40. % % 权值可视化(Visualize weights
  41. % W11 = reshape(sae1OptTheta(1:hiddenSizeL1 * inputSize), hiddenSizeL1, inputSize);
  42. % display_network(W11');
  43. % load('sae1OptTheta.mat');
  44. %%======================================================================
  45. %% STEP 3: 训练第二个稀疏自编码器(训练数据是第一个自编码器提取到的特征)
  46. % 求解第一个自编码器的输出sae1Features(维数为hiddenSizeL1)
  47. [sae1Features] = feedForwardAutoencoder(sae1OptTheta, hiddenSizeL1, ...
  48. inputSize, trainData);
  49. % Randomly initialize the parameters
  50. sae2Theta = initializeParameters(hiddenSizeL2, hiddenSizeL1);
  51. % 开始训练第二个自编码器,输入维数是hiddenSizeL1,输出维数是hiddenSizeL2,优化向量存放在sae2OptTheta中
  52. [sae2OptTheta, ~] = minFunc( @(p) sparseAutoencoderCost(p, ...
  53. hiddenSizeL1, hiddenSizeL2, ... %输入维数、输出维数
  54. lambda, sparsityParam, ...
  55. beta, sae1Features), ...
  56. sae2Theta, options);
  57. % save('sae2OptTheta.mat','sae2OptTheta')
  58. % % Visualize weights
  59. % % W21 = reshape(sae2OptTheta(1:hiddenSizeL2 * hiddenSizeL1), hiddenSizeL2, hiddenSizeL1);
  60. % % display_network(W21'); %无法可视化!!
  61. % load('sae2OptTheta.mat');
  62. %%======================================================================
  63. %% STEP 4: 训练softmax classifier(它的输入为第二个自编码器提取到的特征sae2Features
  64. % 求解第二个自编码器的输出sae1Features(维数为hiddenSizeL2
  65. [sae2Features] = feedForwardAutoencoder(sae2OptTheta, hiddenSizeL2, ...
  66. hiddenSizeL1, sae1Features);
  67. % Randomly initialize the parameters
  68. saeSoftmaxTheta = 0.005 * randn(hiddenSizeL2 * numClasses, 1);
  69. % 开始优化softmax classifier,得到优化向量
  70. options.maxIter = 100;
  71. softmaxModel = softmaxTrain(size(sae2Features,1), numClasses, lambda, ...
  72. sae2Features, trainLabels, options);
  73. saeSoftmaxOptTheta=softmaxModel.optTheta(:);
  74. % load('saeSoftmaxOptTheta.mat')
  75. %%======================================================================
  76. %% STEP 5: 微调多层自编码器
  77. % 利用稀疏自编码(stack)和softmax分类器(saeSoftmaxOptTheta)学习到的参数作为微调模型的初始值
  78. % 稀疏自编码的参数stack
  79. stack = cell(2,1);%存放稀疏自编码器参数的元胞
  80. stack{
  81. 1}.w = reshape(sae1OptTheta(1:hiddenSizeL1*inputSize), ...
  82. hiddenSizeL1, inputSize);
  83. stack{
  84. 1}.b = sae1OptTheta(2*hiddenSizeL1*inputSize+1:2*hiddenSizeL1*inputSize+hiddenSizeL1);
  85. stack{
  86. 2}.w = reshape(sae2OptTheta(1:hiddenSizeL2*hiddenSizeL1), ...
  87. hiddenSizeL2, hiddenSizeL1);
  88. stack{
  89. 2}.b = sae2OptTheta(2*hiddenSizeL2*hiddenSizeL1+1:2*hiddenSizeL2*hiddenSizeL1+hiddenSizeL2);
  90. [stackparams, netconfig] = stack2params(stack);%所有stack转化为向量形式,并提取稀疏自编码器的结构
  91. % 整个模型参数(saeSoftmaxOptTheta+stack
  92. stackedAETheta = [ saeSoftmaxOptTheta ; stackparams ];
  93. % 是否进行梯度检验
  94. DEBUG=1;
  95. if DEBUG
  96. checkStackedAECost()
  97. end
  98. % 开始进行微调优化 Use minFunc to minimize the function
  99. [stackedAEOptTheta, cost] = minFunc( @(p) stackedAECost(p, ...
  100. inputSize, hiddenSizeL2,...%输入层维数、最后一个稀疏编码器隐藏层维数
  101. numClasses, netconfig, ...%稀疏自编码器的结构
  102. lambda, trainData, trainLabels), ...
  103. stackedAETheta, options);
  104. %%======================================================================
  105. %% STEP 6: Test
  106. % 获取有标签样本集
  107. testData = loadMNISTImages('mnist/t10k-images-idx3-ubyte');
  108. testLabels = loadMNISTLabels('mnist/t10k-labels-idx1-ubyte');
  109. testLabels(testLabels == 0) = 10; % Remap 0 to 10
  110. % 进行预测(微调后的)
  111. [pred] = stackedAEPredict(stackedAEOptTheta, inputSize, hiddenSizeL2, ...
  112. numClasses, netconfig, testData);
  113. acc = mean(testLabels(:) == pred(:));% 计算预测精度
  114. fprintf('After Finetuning Test Accuracy: %0.3f%%\n', acc * 100);
  115. % 进行预测(微调前的)
  116. [pred] = stackedAEPredict(stackedAETheta, inputSize, hiddenSizeL2, ...
  117. numClasses, netconfig, testData);
  118. acc = mean(testLabels(:) == pred(:));% 计算预测精度
  119. fprintf('Before Finetuning Test Accuracy: %0.3f%%\n', acc * 100);
  120. % Accuracy is the proportion of correctly classified images
  121. % The results for our implementation were:
  122. % Before Finetuning Test Accuracy: 87.7%
  123. % After Finetuning Test Accuracy: 97.6%
  124. %
  125. % If your values are too low (accuracy less than 95%), you should check
  126. % your code for errors, and make sure you are training on the
  127. % entire data set of 60000 28x28 training images
  128. % (unless you modified the loading code, this should be the case)

stackedAEPredict.m

  1. % stackedAEPredict: Takes a trained theta and a test data set,
  2. % and returns the predicted labels for each example.
  3. % theta: trained weights from the autoencoder
  4. % visibleSize: the number of input units
  5. % hiddenSize: the number of hidden units *at the 2nd layer*
  6. % numClasses: the number of categories
  7. % data: Our matrix containing the training data as columns. So, data(:,i) is the i-th training example.
  8. % Your code should produce the prediction matrix
  9. % pred, where pred(i) is argmax_c P(y(c) | x(i)).
  10. function [pred] = stackedAEPredict(theta, inputSize, hiddenSize, numClasses, netconfig, data)
  11. %% Unroll theta parameter
  12. % We first extract the part which compute the softmax gradient
  13. softmaxTheta = reshape(theta(1:hiddenSize*numClasses), numClasses, hiddenSize);
  14. % Extract out the "stack"
  15. stack = params2stack(theta(hiddenSize*numClasses+1:end), netconfig);
  16. %% ---------- YOUR CODE HERE --------------------------------------
  17. % Instructions: Compute pred using theta assuming that the labels start from 1.
  18. %% 前向传播计算
  19. a{
  20. 1}=data;
  21. depth=numel(netconfig.layersizes);
  22. for i=1:depth
  23. a{i+1}=sigmoid(bsxfun(@plus,stack{i}.w*a{i},stack{i}.b));
  24. end
  25. %% softmax模型的输出Htheta
  26. softmaxData=a{depth+1};%softmax的输入即为stack自编码器最后一层的输出
  27. M=softmaxTheta*softmaxData;%矩阵M
  28. M=bsxfun(@minus,M,max(M));%减去行向量α,防止数据溢出
  29. Htheta=bsxfun(@rdivide,exp(M),sum(exp(M)));%softmax模型的假设函数输出
  30. %% 计算Htheta每一列最大元素所在位置,即为该列所对应样本的类别
  31. [~,pred]=max(Htheta);
  32. end
  33. % You might find this useful
  34. function sigm = sigmoid(x)
  35. sigm = 1 ./ (1 + exp(-x));
  36. end

stackedAECost.m

  1. %{
  2. Takes a trained softmaxTheta and a training data set with labels,
  3. and returns cost and gradient using a stacked autoencoder model. Used for finetuning.
  4. 输入:
  5. theta:整个网络的权值向量
  6. visibleSize: 网络的输入层维数
  7. hiddenSize: 最后一个稀疏自编码器的隐藏层维数
  8. numClasses: 类别总数
  9. netconfig: the network configuration of the stack
  10. lambda: the weight regularization penalty
  11. data: 训练样本集,data(:,i) is the i-th training example.
  12. labels: 训练样本集的标签, where labels(i) is the label for the i-th training example
  13. 输出:
  14. cost:代价函数
  15. grad:梯度向量
  16. %}
  17. function [ cost, grad ] = stackedAECost(theta, ...
  18. inputSize, hiddenSize, ...%输入层维数、最后一个稀疏编码器隐藏层维数
  19. numClasses, netconfig, ...%总类数、稀疏自编码器的结构
  20. lambda, data, labels)
  21. %% 从输入的网络参数向量theta中得到softmax分类器和稀疏自编码器的参数
  22. softmaxTheta = reshape(theta(1:hiddenSize*numClasses), numClasses, hiddenSize);%softmax的参数矩阵
  23. stack = params2stack(theta(hiddenSize*numClasses+1:end), netconfig);% Extract out the "stack"
  24. %% 初始化
  25. %样本个数
  26. numCases = size(data, 2);
  27. %样本标签矩阵groundTruth(即I阵)
  28. groundTruth = full(sparse(labels, 1:numCases, 1));
  29. % softmax分类器的梯度
  30. softmaxThetaGrad = zeros(size(softmaxTheta));
  31. % 稀疏自编码器的梯度(权值w和偏执项b
  32. stackgrad = cell(size(stack));
  33. for d = 1:numel(stack)
  34. stackgrad{d}.w = zeros(size(stack{d}.w));
  35. stackgrad{d}.b = zeros(size(stack{d}.b));
  36. end
  37. %% 前向传播算法
  38. % 初始化工作
  39. depth=numel(stack);% 稀疏自编码器隐藏层的层数(the layor of the network)
  40. z=cell(depth+1,1); % stack网络各层的激励值
  41. a=cell(depth+1,1); % stack网络各层的激励值
  42. a{
  43. 1}=data; % 输入层数据
  44. % 各稀疏自编码器输出a{
  45. 2},...,a{depth+1}
  46. for i=1:depth
  47. %各稀疏编码器提取到的features
  48. z{i+1}=bsxfun(@plus,stack{i}.w*a{i},stack{i}.b);
  49. a{i+1}=sigmoid(z{i+1});
  50. end
  51. % softmax分类器的输出Htheta
  52. softmaxData=a{depth+1};%softmax的输入即为stack自编码器最后一层的输出
  53. M=softmaxTheta*softmaxData;%矩阵M
  54. M=bsxfun(@minus,M,max(M));%减去行向量α,防止数据溢出
  55. Htheta=bsxfun(@rdivide,exp(M),sum(exp(M)));%softmax分类器的假设函数输出
  56. %% 多层网络代价函数的计算(%要对整个网络的所有参数,包括softmax分类器和自编码器的所有参数)
  57. cost=-sum(sum(groundTruth.*log(Htheta)))/numCases+lambda*sum(softmaxTheta(:).^2)/2;
  58. %% 梯度计算
  59. % softmax层的梯度
  60. softmaxThetaGrad=-(groundTruth-Htheta)*softmaxData'/numCases+lambda*softmaxTheta;
  61. % 稀疏自编码层
  62. % 敏感度
  63. delta=cell(depth+1,1);
  64. delta{depth+1}=-softmaxTheta'*(groundTruth-Htheta).*a{depth+1}.*(1-a{depth+1});
  65. for i=depth:-1:2
  66. delta{i}=stack{i}.w'*delta{i+1}.*(a{i}).*(1-a{i});
  67. end
  68. % 梯度值
  69. for i=depth:-1:1
  70. stackgrad{i}.w=delta{i+1}*a{i}'/numCases;
  71. stackgrad{i}.b=sum(delta{i+1},2)'/numCases;
  72. if size(stackgrad{i}.b,2)~=1
  73. stackgrad{i}.b=stackgrad{i}.b';
  74. end
  75. end
  76. %% Roll gradient vector
  77. grad = [softmaxThetaGrad(:) ; stack2params(stackgrad)];
  78. end
  79. % You might find this useful
  80. function sigm = sigmoid(x)
  81. sigm = 1 ./ (1 + exp(-x));
  82. end

发表评论

表情:
评论列表 (有 0 条评论,565人围观)

还没有评论,来说两句吧...

相关阅读

    相关 编码器

    无监督学习介绍 Markdown Code 内容简介 目前许多有监督学习算法,如 SVM,DNN 或是 boosting,决策树等,都在工业界分类或决策任务上取得了