当前位置：首页 > news >正文

厦门php网站建设/买外链有用吗

news 2025/6/30 1:30:57

厦门php网站建设,买外链有用吗,做宠物的网站有哪些,自己公司做公益网站怎么弄GRU 学习torch官方资料，GRU的讲解公式中的参数一共可以总结为6个全连接层，weight[“weight_ih_l0”] 的维度为[3 x hidden size，embedding size] , weight[“weight_hh_l0”]的维度为[3 x hidden size,hidden size]在pytorch中 WirW_{ir}Wi…

GRU 学习torch官方资料，GRU的讲解
基本公式
公式中的参数一共可以总结为6个全连接层，weight[“weight_ih_l0”] 的维度为[3 x hidden size，embedding size] , weight[“weight_hh_l0”]的维度为[3 x hidden size,hidden size]在pytorch中 $W_{ir}$ $W_{iz}$ $W_{in}$ 被融合在一起，同理 $W_{h}$ r,z,n也被融合在一起。
$总参数 = 3 * ((e m b e d d i n g s i z e + h i d d e n s i z e) * h i d d e n s i z e + 2 * h i d d e n s i z e)$

在这里插入图片描述

单层

# -*- coding: utf-8 -*-
"""
Created on Wed Aug 18 11:11:08 2021@author: yihuang
"""
embedding_size =10
hidden_size = 15
time_size = 4
rnn = nn.GRU(embedding_size, hidden_size, 1, batch_first = True)
total_params = sum(p.numel() for p in rnn.parameters())
print(total_params)input = torch.randn(1, time_size, embedding_size)
output, hn = rnn(input)
weight = rnn.state_dict()
print(weight["weight_ih_l0"].shape, weight["weight_hh_l0"].shape)def gruFun(Wih, Bih, Whh, Bhh, x, hid):gate_x = torch.matmul(x, Wih.T) + Bihgate_h = torch.matmul(hid, Whh.T) + Bhhi_r, i_i, i_n = gate_x.chunk(3, -1)h_r, h_i, h_n = gate_h.chunk(3, -1)resetgate = torch.sigmoid(i_r + h_r)inputgate = torch.sigmoid(i_i + h_i)newgate = torch.tanh(i_n + (resetgate * h_n))hy = newgate + inputgate * (hid - newgate)return hy    in_ = input
h0 = torch.zeros(1,1, hidden_size) # torch.zeros_like(in_[:,0,:])
out = torch.zeros(1,time_size , hidden_size)
for i in range(in_.size(1)):ht = gruFun(weight["weight_ih_l0"], weight["bias_ih_l0"],\weight["weight_hh_l0"], weight["bias_hh_l0"], in_[:,i,:], h0)out[:,i,:] = hth0 = ht
print("output:\n", output)
print("out:\n", out)

输出结果

output:tensor([[[ 0.2349,  0.3498, -0.0706, -0.1650,  0.4986,  0.3309,  0.1601,0.5339,  0.0791,  0.6002],[ 0.2641,  0.2583,  0.1358, -0.1170,  0.6451,  0.3311,  0.2608,0.3465,  0.4330,  0.7000],[ 0.3160,  0.3977,  0.1755,  0.0991,  0.7275,  0.0311, -0.0969,0.2958,  0.2427,  0.3704]]], grad_fn=<TransposeBackward1>)
out:tensor([[[ 0.2349,  0.3498, -0.0706, -0.1650,  0.4986,  0.3309,  0.1601,0.5339,  0.0791,  0.6002],[ 0.2641,  0.2583,  0.1358, -0.1170,  0.6451,  0.3311,  0.2608,0.3465,  0.4330,  0.7000],[ 0.3160,  0.3977,  0.1755,  0.0991,  0.7275,  0.0311, -0.0969,0.2958,  0.2427,  0.3704]]])

多层GLU

# -*- coding: utf-8 -*-
"""
Created on Wed Aug 18 11:11:08 2021@author: yihuang
"""
import torch.nn as nn
import torchrnn = nn.GRU(10, 10, 2, batch_first = True)
input = torch.randn(1, 2, 10)
output, hn = rnn(input)
weight = rnn.state_dict()def gruFun(Wih, Bih, Whh, Bhh, x, hid):gate_x = torch.matmul(x, Wih.T) + Bihgate_h = torch.matmul(hid, Whh.T) + Bhhi_r, i_i, i_n = gate_x.chunk(3, -1)h_r, h_i, h_n = gate_h.chunk(3, -1)resetgate = torch.sigmoid(i_r + h_r)inputgate = torch.sigmoid(i_i + h_i)newgate = torch.tanh(i_n + (resetgate * h_n))hy = newgate + inputgate * (hid - newgate)return hy    in_ = input
h01 =  torch.zeros_like(in_[:,0,:])
h02 = h01
out = torch.zeros_like(in_)
for i in range(in_.size(1)):ht = gruFun(weight["weight_ih_l0"], weight["bias_ih_l0"],\weight["weight_hh_l0"], weight["bias_hh_l0"], in_[:,i,:], h01)h01 = htht = gruFun(weight["weight_ih_l1"], weight["bias_ih_l1"],\weight["weight_hh_l1"], weight["bias_hh_l1"], ht, h02)out[:,i,:] = hth02 = ht
print("output:\n", output)
print("out:\n", out)

数据结果

output:tensor([[[ 0.1819,  0.0716, -0.0427,  0.1667, -0.1247, -0.0144,  0.0262,0.1758,  0.0890,  0.1099],[ 0.2991,  0.1910, -0.0414,  0.2278, -0.2381,  0.0801, -0.0465,0.2214,  0.2028,  0.1974]]], grad_fn=<TransposeBackward1>)
out:tensor([[[ 0.1819,  0.0716, -0.0427,  0.1667, -0.1247, -0.0144,  0.0262,0.1758,  0.0890,  0.1099],[ 0.2991,  0.1910, -0.0414,  0.2278, -0.2381,  0.0801, -0.0465,0.2214,  0.2028,  0.1974]]])