Spaces:
Sleeping
Sleeping
| import torch.nn as nn | |
| class TextClassifier(nn.Module): | |
| def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, | |
| dropout, pad_idx): | |
| super(TextClassifier, self).__init__() | |
| # Embedding layer | |
| self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx) | |
| # GRU layers | |
| self.rnn = nn.GRU(embedding_dim, | |
| hidden_dim, | |
| num_layers=n_layers, | |
| bidirectional=True, | |
| dropout=dropout, | |
| batch_first=True) | |
| # Fully connected layer | |
| self.fc = nn.Linear(hidden_dim * 2, output_dim) # Multiply by 2 for bidirection | |
| # Dropout layer | |
| self.dropout = nn.Dropout(dropout) | |
| def forward(self, text, text_lengths): | |
| embedded = self.dropout(self.embedding(text)) | |
| # Pack sequence | |
| packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True, enforce_sorted=False) | |
| packed_output, _ = self.rnn(packed_embedded) | |
| # Unpack sequence | |
| output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True) | |
| # Pass the entire output tensor to the FC layer for token-level classification | |
| return self.fc(output) | |
| class LSTMTextClassifier(nn.Module): | |
| def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, | |
| dropout, pad_idx): | |
| super(LSTMTextClassifier, self).__init__() | |
| # Embedding layer | |
| self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx) | |
| # LSTM layers | |
| self.rnn = nn.LSTM(embedding_dim, | |
| hidden_dim, | |
| num_layers=n_layers, | |
| bidirectional=True, | |
| dropout=dropout, | |
| batch_first=True) | |
| # Fully connected layer | |
| self.fc = nn.Linear(hidden_dim * 2, output_dim) # Multiply by 2 for bidirection | |
| # Dropout layer | |
| self.dropout = nn.Dropout(dropout) | |
| def forward(self, text, text_lengths): | |
| embedded = self.dropout(self.embedding(text)) | |
| # Pack sequence | |
| packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True, enforce_sorted=False) | |
| # Note: LSTM returns both the output and a tuple of (hidden state, cell state) | |
| packed_output, (hidden, cell) = self.rnn(packed_embedded) | |
| # Unpack sequence | |
| output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True) | |
| # Pass the entire output tensor to the FC layer for token-level classification | |
| return self.fc(output) | |
| class PositionalEncoding(nn.Module): | |
| def __init__(self, d_model, max_len=120): | |
| super(PositionalEncoding, self).__init__() | |
| self.d_model = d_model | |
| def forward(self, x): | |
| # If pe doesn't exist or its sequence length is different from x's sequence length | |
| if not hasattr(self, 'pe') or self.pe.size(0) != x.size(1): | |
| max_len = x.size(1) | |
| pe = torch.zeros(max_len, self.d_model) | |
| position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) | |
| div_term = torch.exp(torch.arange(0, self.d_model, 2).float() * (-math.log(10000.0) / self.d_model)) | |
| pe[:, 0::2] = torch.sin(position * div_term) | |
| pe[:, 1::2] = torch.cos(position * div_term) | |
| pe = pe.unsqueeze(0) | |
| self.register_buffer('pe', pe.to(x.device)) | |
| return x + self.pe[:, :x.size(1), :] | |
| import torch.nn as nn | |
| import torch.nn.init as init | |
| def weights_init_kaiming(m): | |
| if isinstance(m, nn.Linear): | |
| init.kaiming_uniform_(m.weight, nonlinearity='relu') | |
| if m.bias is not None: | |
| init.zeros_(m.bias) | |
| elif isinstance(m, nn.Embedding): | |
| init.kaiming_uniform_(m.weight, nonlinearity='relu') | |
| class TransformerClassifier(nn.Module): | |
| def __init__(self, vocab_size, embedding_dim, nhead, num_encoder_layers, | |
| num_classes, dropout, pad_idx): | |
| super(TransformerClassifier, self).__init__() | |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # Embedding layer | |
| self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx) | |
| # Positional encoding | |
| self.pos_encoder = PositionalEncoding(embedding_dim) | |
| # Transformer with dropout | |
| transformer_encoder = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=nhead, dropout=dropout, activation="gelu") | |
| self.transformer = nn.TransformerEncoder(transformer_encoder, num_layers=num_encoder_layers) | |
| # Classifier with dropout | |
| self.classifier = nn.Sequential( | |
| nn.Dropout(dropout), | |
| nn.Linear(embedding_dim, num_classes) | |
| ) | |
| def create_attention_mask(self, src, pad_idx): | |
| return (src == pad_idx) | |
| def forward(self, src, pad_idx): | |
| # Check pad_idx | |
| if isinstance(pad_idx, torch.Tensor) and torch.numel(pad_idx) > 1: | |
| raise ValueError("Expected pad_idx to be a scalar value, but got a tensor with multiple elements.") | |
| # Transpose src to have shape (seq_len, batch_size) | |
| src = src.transpose(0, 1) | |
| # Embedding | |
| x = self.embedding(src) | |
| # Positional Encoding | |
| x = self.pos_encoder(x.to(self.device)) | |
| # Create attention mask | |
| src_key_padding_mask = self.create_attention_mask(src.transpose(0, 1), pad_idx) # Transpose back to (batch_size, sequence_length) | |
| # Transformer | |
| x = self.transformer(x, src_key_padding_mask=src_key_padding_mask) | |
| #print(model.state_dict()) | |
| # Classification | |
| return self.classifier(x) |