TensorFlow: Movie Rating Prediction
Revision as of 10:24, 30 July 2019 by Onnowpurbo (talk | contribs)
Download
https://github.com/fivethirtyeight/data/blob/master/fandango/fandango_score_comparison.csv https://github.com/fivethirtyeight/data/blob/master/fandango/fandango_scrape.csv
Source
import numpy as np import pandas as pd from scipy import stats import sklearn from sklearn.model_selection import train_test_split import tensorflow as tf import matplotlib import matplotlib.pyplot as plt import seaborn as sns import math df = pd.read_csv('fandango_score_comparison.csv') print(df.head()) df.rename(columns={'Metacritic_user_nom':'Metacritic_user_norm'},inplace=True) rankings_lst = ['Fandango_Stars', 'RT_user_norm', 'RT_norm', 'IMDB_norm', 'Metacritic_user_norm', 'Metacritic_norm'] def my_heatmap(df): import seaborn as sns fig, axes = plt.subplots() sns.heatmap(df, annot=True) plt.show() plt.close() my_heatmap(df[rankings_lst].corr(method='pearson')) RT_lst = df['RT_norm'] >= 4. my_heatmap(df[RT_lst][rankings_lst].corr(method='pearson')) feature_cols = ['Fandango_Stars', 'RT_user_norm', 'RT_norm', 'Metacritic_user_norm', 'Metacritic_norm'] X = df.loc[:, feature_cols] y = df['IMDB_norm'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=43) dim = len(feature_cols) dim += 1 X_train = X_train.assign( independent = pd.Series([1] * len(y_train), index=X_train.index)) X_test = X_test.assign( independent = pd.Series([1] * len(y_train), index=X_test.index)) P_train = X_train.as_matrix(columns=None) P_test = X_test.as_matrix(columns=None) q_train = np.array(y_train.values).reshape(-1,1) q_test = np.array(y_test.values).reshape(-1,1) P = tf.placeholder(tf.float32,[None,dim]) q = tf.placeholder(tf.float32,[None,1]) T = tf.Variable(tf.ones([dim,1])) bias = tf.Variable(tf.constant(1.0, shape = [dim])) q_ = tf.add(tf.matmul(P, T),bias) cost = tf.reduce_mean(tf.square(q_ - q)) learning_rate = 0.0001 training_op = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost) init_op = tf.global_variables_initializer() cost_history = np.empty(shape=[1],dtype=float) training_epochs = 50000 with tf.Session() as sess: sess.run(init_op) cost_history = np.empty(shape=[1], dtype=float) t_history = np.empty(shape=[dim, 1], dtype=float) for epoch in range(training_epochs): sess.run(training_op, feed_dict={P: P_train, q: q_train}) cost_history = np.append(cost_history, sess.run(cost,feed_dict={P: P_train, q: q_train})) t_history = np.append(t_history, sess.run(T, feed_dict={P: P_train, q: q_train}), axis=1) q_pred = sess.run(q_, feed_dict={P: P_test})[:, 0] mse = tf.reduce_mean(tf.square(q_pred - q_test)) mse_temp = mse.eval() sess.close() print(mse_temp) RMSE = math.sqrt(mse_temp) print(RMSE)