Making: github.com/yangjinghit…

import pandas as pd
import numpy as np
Copy the code
data = pd.read_csv('Tweets.csv')

Copy the code
data.head(2)
Copy the code
tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_confidence airline airline_sentiment_gold name negativereason_gold retweet_count text tweet_coord tweet_created tweet_location user_timezone
0 570306133677760513 neutral 1.0000 NaN NaN Virgin America NaN cairdin NaN 0 @VirginAmerica What @dhepburn said. NaN The 11:35:52 2015-02-24-0800 NaN Eastern Time (US & Canada)
1 570301130888122368 positive 0.3486 NaN 0.0 Virgin America NaN jnardino NaN 0 @VirginAmerica plus you’ve added commercials t… NaN The 11:15:59 2015-02-24-0800 NaN Pacific Time (US & Canada)
data = data[['airline_sentiment'.'text']]
Copy the code
with open('twee'.'a', encoding = 'utf-8') as f:
    for string in data.text:
        f.writelines(string+'\n')
Copy the code
from gensim.models import word2vec
Copy the code
sentences = word2vec.Text8Corpus("twee")
model = word2vec.Word2Vec(sentences, size=300)
Copy the code
word_vectors = model.wv
del model
Copy the code
data['vec'] = data.text.apply(lambda x : [word_vectors[w] for w in x.split() if w in word_vectors])
Copy the code
data = data[data['vec'].apply(lambda x : len(x)>5)]
Copy the code
data.head(3)
Copy the code
airline_sentiment text vec
1 positive @VirginAmerica plus you’ve added commercials t… [[2.2402475, 0.15890086, 0.082046695, 0.80472…
2 neutral @VirginAmerica I didn’t today… Must mean I n… [[2.2402475, 0.15890086, 0.082046695, 0.80472…
3 negative @VirginAmerica it’s really aggressive to blast… [[2.2402475, 0.15890086, 0.082046695, 0.80472…
del data['text']
Copy the code
data.airline_sentiment.unique()
Copy the code
array(['positive', 'neutral', 'negative'], dtype=object)
Copy the code
data.airline_sentiment.value_counts()
Copy the code
negative    9007
neutral     2789
positive    2013
Name: airline_sentiment, dtype: int64
Copy the code
dic = {'neutral':np.array([1.0.0]), 'positive':np.array([0.1.0]), 'negative':np.array([0.0.1])}
Copy the code
data['cat'] = data.airline_sentiment.map(dic)
Copy the code
del data['airline_sentiment']
Copy the code
data.columns
Copy the code
Index(['vec', 'cat'], dtype='object')
Copy the code
data = data.reset_index()
del data['index']
Copy the code
maxlength = max(len(x) for x in data.vec)
Copy the code
maxlength
Copy the code
36
Copy the code
data.head(2)
Copy the code
vec cat
0 [[2.2402475, 0.15890086, 0.082046695, 0.80472… [0, 1, 0]
1 [[2.2402475, 0.15890086, 0.082046695, 0.80472… (1, 0, 0]
def pad(x):
    xl = np.zeros((maxlength, 300))
    xl[:len(x)] = x
    return xl
Copy the code
dataset = data.vec.apply(pad)
Copy the code
dataset.head(2)
Copy the code
0    [[2.2402474880218506, 0.15890085697174072, -0....
1    [[2.2402474880218506, 0.15890085697174072, -0....
Name: vec, dtype: object
Copy the code
len(dataset)
Copy the code
13809
Copy the code
labels = np.concatenate(data.cat).reshape(len(data.cat), - 1)

Copy the code
np.shape(labels)
Copy the code
(13809, 3)
Copy the code
data_ = np.concatenate(dataset).reshape(len(dataset), maxlength, 300)
Copy the code
np.shape(data_)
Copy the code
(13809, 36, 300)
Copy the code
index = np.random.permutation(int(len(data)))
Copy the code
label = labels[index]
dataset = data_[index]
Copy the code
label_train = label[:12000]
dataset_train = dataset[:12000]
label_test = label[12000:]
dataset_test = dataset[12000:]
Copy the code
import tensorflow as tf
Copy the code
/ anaconda3 / envs/py35 / lib/python3.5 importlib / _bootstrap py: 222: RuntimeWarning: Compiletime version 3.6 of the module 'tensorflow. Python. Framework. Fast_tensor_util' does not match the runtime version 3.5 Return (* args, f * * KWDS)/anaconda3 envs py35 / lib/python3.5 / site - packages/h5py/set py: 36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. from ._conv import register_converters as _register_convertersCopy the code
learning_rate = 0.005
batch_size = 300
n_input = 300
n_steps = maxlength
n_hidden = 128
n_classes = 3
Copy the code
x = tf.placeholder(tf.float32, [None, n_steps, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
output_keep_prob = tf.placeholder("float")
Copy the code
reg = tf.contrib.layers.l2_regularizer(scale=0.01)

Copy the code
def length(shuju):
    return tf.reduce_sum(tf.sign(tf.reduce_max(tf.abs(shuju),reduction_indices=2)), reduction_indices=1)
Copy the code
cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.GRUCell(n_hidden,
                                                           kernel_initializer = tf.truncated_normal_initializer(stddev= 0.0001),
                                                           bias_initializer = tf.truncated_normal_initializer(stddev=0.0001)),
                                    output_keep_prob = output_keep_prob)

Copy the code
output, _ = tf.nn.dynamic_rnn(
            cell,
            x,
            dtype=tf.float32,
            sequence_length= length(x))
Copy the code
output.get_shape()
Copy the code
TensorShape([Dimension(None), Dimension(36), Dimension(128)])
Copy the code
index = tf.range(0, batch_size)*n_steps + (tf.cast(length(x), tf.int32) - 1)
flat = tf.reshape(output, [- 1, int(output.get_shape()[2])])
last = tf.gather(flat, index)
Copy the code
fc_1 = tf.contrib.layers.fully_connected(
                        last,
                        64,
                        weights_initializer = tf.truncated_normal_initializer(stddev=0.01),
                        activation_fn = tf.nn.relu)
keep_prob = tf.placeholder("float")
fc1_drop = tf.nn.dropout(fc_1, keep_prob)
Copy the code
weight = tf.Variable(tf.truncated_normal([64, n_classes],stddev=0.001))
bias = tf.Variable(tf.constant(0.1, shape=[n_classes]))
prediction = tf.nn.softmax(tf.matmul(fc1_drop, weight) + bias)
Copy the code
cross_entropy = -tf.reduce_sum(y * tf.log(prediction))
Copy the code
weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
Copy the code
tf.contrib.layers.apply_regularization(reg, weights_list=weights)
Copy the code
<tf.Tensor 'get_regularization_penalty:0' shape=() dtype=float32>
Copy the code
reg_ws = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
Copy the code
optimizer = tf.train.AdamOptimizer(learning_rate,beta1=0.9)
grads = optimizer.compute_gradients(cross_entropy + tf.reduce_sum(reg_ws))
for i, (g,v) in enumerate(grads):
    if g is not None:
        grads[i] = (tf.clip_by_norm(g, 5), v)
train_op = optimizer.apply_gradients(grads)
Copy the code
/ anaconda3 / envs/py35 / lib/python3.5 / site - packages/tensorflow/python/ops/gradients_impl py: 97: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory. "Converting sparse IndexedSlices to a dense Tensor of unknown shape. " WARNING:tensorflow:From / anaconda3 / envs/py35 / lib/python3.5 / site - packages/tensorflow/python/ops/clip_ops py: 110: calling reduce_sum (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version. Instructions for updating: keep_dims is deprecated, use keepdims insteadCopy the code
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

Copy the code
def generatebatch(X,Y, n_examples, batch_size):
    for batch_i in range(n_examples // batch_size):
        start = batch_i*batch_size
        end = start + batch_size
        batch_xs = X[start:end]
        batch_ys = Y[start:end]
        yield batch_xs, batch_ys
Copy the code
sess = tf.Session()

Copy the code
init = tf.global_variables_initializer()
sess.run(init)
Copy the code
saver = tf.train.Saver()
Copy the code
for step in range(18) : index_= np.random.permutation(int(len(dataset_train))) dataset_train = dataset_train[index_] label_train = label_train[index_]for batch_x, batch_y in generatebatch(dataset_train, label_train, len(label_train), batch_size):
        sess.run(train_op, feed_dict={x:batch_x, y:batch_y, keep_prob:0.5, output_keep_prob:0.5})
    acc = sess.run(accuracy, feed_dict={x:batch_x, y:batch_y, keep_prob:1, output_keep_prob:1})
    loss = sess.run(cross_entropy, feed_dict={x:batch_x, y:batch_y, keep_prob:1, output_keep_prob:1})
    saver.save(sess, './lesson0', global_step=step)
    print("Iter" + str(step) + "MiniBatch Loss =" + "{:.6f}".format(loss) + ", Training Accuracy = " + "{:.5f}".format(acc))
print("Optimization Finished!")
Copy the code
Iter0MiniBatch Loss =214.256958, Training Accuracy = 0.66667
Iter1MiniBatch Loss =173.106171, Training Accuracy = 0.76333
Iter2MiniBatch Loss =163.925598, Training Accuracy = 0.80333
Iter3MiniBatch Loss =158.836716, Training Accuracy = 0.77667
Iter4MiniBatch Loss =155.008820, Training Accuracy = 0.79667
Iter5MiniBatch Loss =131.040298, Training Accuracy = 0.83667
Iter6MiniBatch Loss =133.507889, Training Accuracy = 0.80667
Iter7MiniBatch Loss =114.443909, Training Accuracy = 0.86333
Iter8MiniBatch Loss =103.080223, Training Accuracy = 0.86333
Iter9MiniBatch Loss =99.932602, Training Accuracy = 0.90000
Iter10MiniBatch Loss =93.207428, Training Accuracy = 0.86000
Iter11MiniBatch Loss =67.471329, Training Accuracy = 0.93000
Iter12MiniBatch Loss =62.449608, Training Accuracy = 0.92333
Iter13MiniBatch Loss =50.676277, Training Accuracy = 0.93000
Iter14MiniBatch Loss =55.832417, Training Accuracy = 0.92333
Iter15MiniBatch Loss =44.194443, Training Accuracy = 0.96333
Iter16MiniBatch Loss =30.585236, Training Accuracy = 0.95667
Iter17MiniBatch Loss =48.206429, Training Accuracy = 0.94333
Optimization Finished!
Copy the code