Let’s do this problem today
Direct drive do
So let’s just do what we did in the last one
In the code
import numpy as np import pandas as pd from sklearn.linear_model import LogisticRegression from matplotlib import pyplot As PLT from sklearn.metrics import accuracy_score # accuracy_score = pd.read_csv('task2_data.csv') data.head() mask = data.loc[:, 'y'] == 1 print(mask) fig1 = plt.figure() plt.ylabel('pay2') plt.xlabel('pay1') plt.title('pay1_pay2') abnormal = plt.scatter(data.loc[:, 'pay1'][mask], data.loc[:, 'pay2'][mask]) normal = plt.scatter(data.loc[:, 'pay1'][~mask], data.loc[:, 'pay2'][~mask]) plt.legend((normal, abnormal), ('normal', 'abnormal')) plt.show() x = data.drop(['y'], Axis =1) y = data.loc[:, 'y'] x.read () y.read () # resume model LR1 = LogisticRegression() lr1.fit (x, Print (y_predict, y) # accuracy_score(y, accuracy_score) # accuracy_score(y, accuracy_score) Y_predict) print(accuracy); theta0 = intercept_[0] theta1, theta2 = intercept_[0] LR1.coef_[0][1] print(theta0, theta1, theta2) x1 = data.loc[:, 'pay1'] x2_new = -(theta0 + theta1 * x1)/theta2 print(x2_new) fig2 = plt.figure() plt.ylabel('pay2') plt.xlabel('pay1') plt.title('pay1_pay2') abnormal = plt.scatter(data.loc[:, 'pay1'][mask], data.loc[:, 'pay2'][mask]) normal = plt.scatter(data.loc[:, 'pay1'][~mask], data.loc[:, 'pay2'][~mask]) plt.legend((normal, abnormal), ('normal', 'abnormal')) plt.plot(x1, x2_new) plt.show()Copy the code
It feels like everything is going well until you get to 80% accuracy and draw the decision boundaries
Why is she like this, always feeling something wrong
Let’s see, now this painting seems to classify points is not unreasonable, is a bit far-fetched. Maybe it’s better to consider a quadratic function.
Add a quadratic term and type the code again
import numpy as np import pandas as pd from sklearn.linear_model import LogisticRegression from matplotlib import pyplot As PLT from sklearn.metrics import accuracy_score # accuracy_score = pd.read_csv('task2_data.csv') data.head() mask = data.loc[:, 'y'] == 1 print(mask) fig1 = plt.figure() plt.ylabel('pay2') plt.xlabel('pay1') plt.title('pay1_pay2') abnormal = plt.scatter(data.loc[:, 'pay1'][mask], data.loc[:, 'pay2'][mask]) normal = plt.scatter(data.loc[:, 'pay1'][~mask], data.loc[:, 'pay2'][~mask]) plt.legend((normal, abnormal), ('normal', 'abnormal')) plt.show() x = data.drop(['y'], Axis =1) y = data.loc[:, 'y'] x.read () y.read () # resume model LR1 = LogisticRegression() lr1.fit (x, Print (y_predict, y) # accuracy_score(y, accuracy_score) # accuracy_score(y, accuracy_score) Y_predict) print(accuracy); theta0 = intercept_[0] theta1, theta2 = intercept_[0] LR1.coef_[0][1] print(theta0, theta1, theta2) x1 = data.loc[:, 'pay1'] x2_new = -(theta0 + theta1 * x1) / theta2 print(x2_new) fig2 = plt.figure() plt.ylabel('pay2') plt.xlabel('pay1') plt.title('pay1_pay2') abnormal = plt.scatter(data.loc[:, 'pay1'][mask], data.loc[:, 'pay2'][mask]) normal = plt.scatter(data.loc[:, 'pay1'][~mask], data.loc[:, 'pay2'][~mask]) plt.legend((normal, Abnormal), ('normal', 'abnormal') plt.plot(x1, x2_new) plt.show() # construct quadvariate x2 = data.loc[:, 'pay2'] x1_2 = x1 * x1 x2_2 = x2 * x2 x1_x2 = x1 * x2 print(x1_2.shape, x2_2.shape, # x1_x2. Shape) to create two classification boundary data x_new = {' x1: x1, x2 ": x2, 'x1_2: x1_2,' x2_2: x2_2, 'x1_x2' : X_new = pd.dataframe (x_new) print(x_new) Print (y) print(y) print(y) # X_test = np.array([[80, accuracy2) print(accuracy2) # Print ('abnormal' if y_predict1_test==1 else 'normal') x_test_2 = np.array([[80, 20, 6400, 400, Predict2_test = LR2. Predict (x_test_2) print('abnormal' if y_predict2_test==1 Else 'normal') # calculate the decision boundary function "" Theta5's theta0 + theta1*x1 + theta2*x2 + theta3*x1^2 + theta4*x2^2 +theta5*x1*x2 converts to ax^2 + bx +c = 0 to get a = theta4 b X1_new_2 = x1.sort_values(); x1_new_2 = x1; Theta0 = intercept_[0] theta1, theta2, theTA3, theTA4, theTA5 = lr2. coef_[0][0], lr2. coef_[0][1], LR2.coef_[0][2], LR2.coef_[0][3], \ LR2.coef_[0][4], print(theta0, theta1, theta2, theta3, theta4, theta5) a = theta4 b = theta5 * x1_new_2 + theta2 c = theta0 + theta1 * x1_new_2 + theta3 * x1_new_2 * x1_new_2 x2_new_2 = (-b + np.sqrt(b * b-4 * a * c))/(2 * a) print(x2_new_2) # plt.xlabel('pay1') plt.title('pay1_pay2') abnormal = plt.scatter(data.loc[:, 'pay1'][mask], data.loc[:, 'pay2'][mask]) normal = plt.scatter(data.loc[:, 'pay1'][~mask], data.loc[:, 'pay2'][~mask]) plt.legend((normal, abnormal), ('normal', 'abnormal')) plt.plot(x1_new_2, x2_new_2) plt.show()Copy the code
Look at the picture at the end
Look at this picture. It doesn’t make sense