RCNN for Text Classification

模型

论文:Recurrent Convolutional Neural Networks for Text Classification

单词表示学习

单词左侧的上下文向量表示为:

单词右侧的上下文向量表示为:

参数表示单词的词嵌入向量

函数f时激活函数

任意文档的第一个单词的左侧的上下文向量使用相同的共享参数来表示。

任意文档的最后一个单词的右侧的上下文向量使用相同的共享参数来表示。

rcnn-1

本例中使用的示例句子为:”A sunset stroll along the South
Bank affords an array of stunning vantage points.”

关于为单词的嵌入向量表示。

此处为潜在语义向量。

通过使用循环神经网络来获取的信息。

文本表示学习

max-pooling 层:

作用:

  1. 得到固定长度的向量,即为固定长度的向量。
  2. 获取最能表示文本意思的潜在语义信息。

输出层:

softmax层:

论文复现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
class TextRCNN(object):
def __init__(self,
num_classes,learning_rate,
decay_steps,decay_rate,
sequence_length,vocab_size,
embed_size,is_training,
batch_size,initializer=tf.random_normal_initializer(stddev=0.1)):
self.num_classes = num_classes
self.batch_size = batch_size
self.sequence_length=sequence_length
self.vocab_size=vocab_size
self.embed_size=embed_size
self.hidden_size=embed_size
self.is_training=is_training
self.learning_rate=learning_rate
self.initializer=initializer

# add placeholder
self.input_x = tf.placeholder(tf.int32, [None, self.sequence_length], name="input_x")
self.input_y = tf.placeholder(tf.int32, [None,self.num_classes], name="input_y")
self.dropout_keep_prob=tf.placeholder(tf.float32,name="dropout_keep_prob")
self.global_step = tf.Variable(0, trainable=False, name="Global_Step")
self.decay_steps, self.decay_rate = decay_steps, decay_rate

#参数初始化
self.instantiate_weights()
self.logits = self.inference() #[None, self.label_size].
if not is_training:
return

self.loss_val = self.loss()
self.train_op = self.train()
self.predictions = tf.argmax(self.logits, axis=1, name="predictions")
correct_prediction = tf.equal(tf.cast(self.predictions,tf.int64), tf.argmax(self.input_y,axis=1))
self.accuracy =tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="Accuracy")

def instantiate_weights(self):
with tf.name_scope("weights"): # embedding matrix
self.Embedding = tf.get_variable("Embedding",shape=[self.vocab_size, self.embed_size],initializer=self.initializer)
self.left_side_first_word= tf.get_variable("left_side_first_word",shape=[self.batch_size,self.embed_size],initializer=self.initializer)
self.right_side_last_word = tf.get_variable("right_side_last_word",shape=[self.batch_size,self.embed_size],initializer=self.initializer)

self.W_l=tf.get_variable("W_l",shape=[self.embed_size, self.embed_size],initializer=self.initializer)
self.W_r=tf.get_variable("W_r",shape=[self.embed_size, self.embed_size],initializer=self.initializer)

self.W_sl=tf.get_variable("W_sl",shape=[self.embed_size, self.embed_size],initializer=self.initializer)
self.W_sr=tf.get_variable("W_sr",shape=[self.embed_size, self.embed_size],initializer=self.initializer)
self.b = tf.get_variable("b", [self.embed_size])

self.W2 = tf.get_variable("W2",shape=[self.hidden_size*3,self.hidden_size*3],initializer=self.initializer)
self.b2 = tf.get_variable("b2",shape=[self.hidden_size*3])

self.W_projection = tf.get_variable("W_projection",shape=[self.hidden_size*3, self.num_classes],initializer=self.initializer)
self.b_projection = tf.get_variable("b_projection",shape=[self.num_classes])

def get_context_left(self,context_left,embedding_previous):
"""
公式(1)
:param context_left
:param embedding_previous
:return [None,embed_size]
"""
left_c=tf.matmul(context_left,self.W_l) #[batch_size,embed_size]
left_e=tf.matmul(embedding_previous,self.W_sl)
left_h=left_c+left_e
context_left = tf.nn.relu(tf.nn.bias_add(left_h, self.b), "relu")
return context_left
def get_context_right(self,context_right,embedding_afterward):
"""
公式(2)
:param context_right
:param embedding_afterward
:return [None,embed_size]
"""
right_c=tf.matmul(context_right,self.W_r)
right_e=tf.matmul(embedding_afterward,self.W_sr)
right_h=right_c+right_e
context_right = tf.nn.relu(tf.nn.bias_add(right_h, self.b), "relu")
return context_right

def conv_layer_with_recurrent_structure(self):
"""
公式(3)
input:self.embedded_words:[None,sentence_length,embed_size]
:return [None,sentence_length,embed_size*3]
"""
#1. get splitted list of word embeddings
embedded_words_split=tf.split(self.embedded_words,self.sequence_length,axis=1)
embedded_words_squeezed=[tf.squeeze(x,axis=1) for x in embedded_words_split]
embedding_previous=self.left_side_first_word
context_left_previous=tf.zeros((self.batch_size,self.embed_size))

#2. get list of context left
context_left_list=[]
for i,current_embedding_word in enumerate(embedded_words_squeezed):
context_left=self.get_context_left(context_left_previous, embedding_previous)
context_left_list.append(context_left)
embedding_previous=current_embedding_word
context_left_previous=context_left
#3. get context right
embedded_words_squeezed2=copy.copy(embedded_words_squeezed)
embedded_words_squeezed2.reverse()
embedding_afterward=self.right_side_last_word
context_right_afterward = tf.zeros((self.batch_size, self.embed_size))
context_right_list=[]
for j,current_embedding_word in enumerate(embedded_words_squeezed2):
context_right=self.get_context_right(context_right_afterward,embedding_afterward)
context_right_list.append(context_right)
embedding_afterward=current_embedding_word
context_right_afterward=context_right

#4.ensemble "left,embedding,right" to output
output_list=[]
for index,current_embedding_word in enumerate(embedded_words_squeezed):
representation=tf.concat([context_left_list[index],current_embedding_word,
context_right_list[index]],axis=1)
#公式(4)
representation = tf.nn.tanh(tf.matmul(representation,self.W2)+self.b2)
output_list.append(representation)
#5. stack list to a tensor
output=tf.stack(output_list,axis=1)
return output

def inference(self):
#1.get emebedding of words in the sentence
self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x)
#2. Bi-lstm layer
output_conv=self.conv_layer_with_recurrent_structure()
#2.1 W2,b2,h=tf.nn.relu
#3. max pooling公式(5)
output_pooling=tf.reduce_max(output_conv,axis=1) #shape:[None,embed_size*3]
#4. logits(use linear layer)
with tf.name_scope("dropout"):
h_drop=tf.nn.dropout(output_pooling,keep_prob=self.dropout_keep_prob)
with tf.name_scope("output"):
#公式(6)
logits = tf.matmul(h_drop, self.W_projection) + self.b_projection

return logits

def loss(self,l2_lambda=0.0001):
with tf.name_scope("loss"):
losses = tf.nn.softmax_cross_entropy_with_logits(labels=self.input_y,logits=self.logits)
loss=tf.reduce_mean(losses)
l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()
if 'bias' not in v.name]) * l2_lambda
loss=loss+l2_losses
return loss

def train(self):
learning_rate = tf.train.exponential_decay(self.learning_rate, self.global_step,
self.decay_steps,self.decay_rate, staircase=True)
train_op = tf.contrib.layers.optimize_loss(self.loss_val, global_step=self.global_step,
learning_rate=learning_rate, optimizer="Adam")
return train_op

参考资料