머신러닝

1-7 Scikit SVC vs TensorFlow Softmax Classification For Iris Flowers Dataset

coding art 2020. 1. 12. 12:18
728x90

SklearnSVChyperplane 경계에서 데이터가 혼합되어 있는 경우 중요 파라메터인 γ C 값을 조절하여 Overfitting 조절이 가능하다. 따라서 SVC 예제에서처럼 Iris Flowers 데이터를 대상으로 다항식 기법을 적용한 TesnsorFlow 코드를 사용하여 결과를 비교해 보기로 하자.

 

SVC루틴을 사용하여 결과를 아래와 같은 결과를 얻어 보기 위해서는 150개의 Iris 데이터를 7:3 으로 학습용(X_train)과 테스트용(X_test)을 나누어(split) 처리하기로 하자. 학습용과 테스트용 두 종류로 분리되는 데이터는 반드시 StandardScaler를 사용하여 표준화 처리를 해야 한다. 반면에 TensorFlow에서 코딩된 다항식 기법 적용 시에는 표준화 처리를 할 필요가 없다. 실제 SVC와 다항식 기법 계산 결과를 참조해 보면 Setosa를 식별하는 hyperplane 의 기울기를 참조해 보면 될 것 같다.


SVC기법에서 gamma 값아 커지면 Setosa(0)Vergicolor(1) 식별 결과가 다음의 첫 번째 그림에서처럼 너무 빡빡해지는 경향이 있으며 C 값이 지나치게 커져도 Classification 결과가 자연스럽지는 못하다. rbf (Radical Basis Function) kernel을 사용하는 SVC 알고리듬에서 gamma 값이 작아야 즉 Vapnik soft margin classification 효과가 가능해지며 한편 linear kernel을 사용하는 SVC 라든지 또는 LogisticRegression Classifier 결과와 비교가 가능하다.


TensorFlow 코딩에서 다음의 결과들은 비선형 Covariance 만을 사용하는 hypothesis를 사용할 경우와 선형항과 바선형 항을 혼합했을 경우의 특장을 보여준다. Covariance 만을 사용했을 경우에는 hyperbola 형으로 classification 이 강하게 이루어지는 반면에 선형 항이 추가되었을 경우에는 직선성이 강화되어 linear kernel 이나 작은 gamma 값을 가지는 rbf kernel 사용 때와 유사성이 보인다.




#Iris_svc_softmax_01.py

from sklearn import datasets
import numpy as np

iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
print('Class labels:', np.unique(y))

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=1, stratify=y)


from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

#Training a perceptron via scikit-learn
#Redefining the `plot_decision_region` function from chapter 2:
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):

    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])

    # plot the decision surface
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.8, cmap=cmap)
    plt.grid(True)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0],
                    y=X[y == cl, 1],
                    alpha=0.3,
                    c=colors[idx],
                    marker=markers[idx],
                    label=cl,
                    edgecolor='black')

    # highlight test samples
    if test_idx:
        # plot all samples
        X_test, y_test = X[test_idx, :], y[test_idx]

        plt.scatter(X_test[:, 0],
                    X_test[:, 1],
                    c='',
                    edgecolor='black',
                    alpha=1.0,
                    linewidth=1,
                    marker='o',
                    s=100,
                    label='test set')


#Maximum margin classification with support vector machines
#Maximum margin intuition
#Dealing with the nonlinearly separable case using slack variables

from sklearn.svm import SVC
from matplotlib.colors import ListedColormap

svm = SVC(kernel='rbf', random_state=1, gamma=1, C=10.0)
#svm = SVC(kernel='linear', C=1.0, random_state=1 )
svm.fit(X_train_std, y_train)

X_combined = np.vstack((X_train_std, X_test_std)) #For SVC
Z_combined = np.vstack((X_train, X_test)) #For softmax
#print(X_combined_std)
print(y_train.shape)
print(y_test.shape)
y_combined = np.hstack((y_train, y_test))
#print(y_combined)
plot_decision_regions(X_combined, y_combined,classifier=svm,
                      test_idx=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()

#Following is method to convert numpy array to tensor
import tensorflow as tf
import time

start_time = time.time()

def fn(X,W1,b1,W2,b2):
    hypothesis =  tf.nn.softmax((tf.matmul(X, W2) + b2)*(tf.matmul(X, W1) + b1))
    #hypothesis = tf.nn.softmax((tf.matmul(X, W0) + b0)
    #+ 0.2*(tf.matmul(X, W2) + b2)*(tf.matmul(X, W1) + b1))
    return hypothesis


def tf_plot_decision_regions(X_xor, y, hypothesis, p1,p2,p3, test_idx=None, resolution=0.02):
   
   
    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])
    # plot the decision surface
    x1_min, x1_max = X_xor[:, 0].min() - 1, X_xor[:, 0].max() + 1
    x2_min, x2_max = X_xor[:, 1].min() - 1, X_xor[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    XX = np.array([xx1.ravel(), xx2.ravel()]).T
    print(XX.shape)
    #Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    #Z = Z.reshape(xx1.shape)
    #h, p = sess.run([hypothesis, predicted], feed_dict={X: (np.array([xx1.ravel(), xx2.ravel()]).T) })
    h, p, q, r = sess.run([hypothesis, p1, p2, p3], feed_dict={X: XX })

    p = p.reshape(xx1.shape)
    plt.contourf(xx1, xx2, p, alpha=0.2, cmap=cmap)
    q = q.reshape(xx1.shape)
    plt.contourf(xx1, xx2, q, alpha=0.1, cmap=cmap)
    r = r.reshape(xx1.shape)
    plt.contourf(xx1, xx2, r, alpha=0.0, cmap=cmap)
    plt.grid(True)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X_xor[y == cl, 0],
                    y=X_xor[y == cl, 1],
                    alpha=0.8,
                    c=colors[idx],
                    marker=markers[idx],
                    label=cl,
                    edgecolor='black')

    # highlight test samples
    if test_idx:
        # plot all samples
        X_test, y_test = X_xor[test_idx, :], y[test_idx]

        plt.scatter(X_test[:, 0],
                    X_test[:, 1],
                    c='',
                    edgecolor='black',
                    alpha=1.0,
                    linewidth=1,
                    marker='o',
                    s=100,
                    label='test set')


#Training Data

Y_train = np.zeros([105,3])
Y_train[y_train == 0] = [1.,0.,0.]
Y_train[y_train == 1] = [0.,1.,0.]
Y_train[y_train == 2] = [0.,0.,1.]
Y_test = np.zeros([45,3])
Y_test[y_test == 0] = [1.,0.,0.]
Y_test[y_test == 1] = [0.,1.,0.]
Y_test[y_test == 2] = [0.,0.,1.]
print('Y_train=',Y_train.shape)
print('Y_test=',Y_test.shape)


X_train = np.float32(X_train)
Y_train = np.float32(Y_train)
#print(X_train.shape)
#print(Y_train.shape)

#hyperparameter
learning_rate = 0.00005 #Adams
training_epochs = 20000
display_steps = 1000
#Network parameters
n_input = 2
dof1 = 3
#Graph Nodes
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, dof1])
#print(X)
#print(Y)

#Weights and Biases, model, loss and optimizer
W0 = tf.Variable(tf.random_normal([n_input, dof1], stddev=0.01))
b0 = tf.Variable(tf.random_normal([dof1], stddev=0.01))
W1 = tf.Variable(tf.random_normal([n_input, dof1], stddev=0.01))
b1 = tf.Variable(tf.random_normal([dof1], stddev=0.01))
W2 = tf.Variable(tf.random_normal([n_input, dof1], stddev=0.01))
b2 = tf.Variable(tf.random_normal([dof1], stddev=0.01))

#hypothesis =  (tf.matmul(X, W2) + b2)*(tf.matmul(X, W1) + b1)
hypothesis = fn(X,W1,b1,W2,b2)

# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.002).minimize(cost)
#optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

p1 = tf.cast(hypothesis[:, 0] > 0.5, dtype=tf.float32)
p2 = tf.cast(hypothesis[:, 1] > 0.5, dtype=tf.float32)
p3 = tf.cast(hypothesis[:, 2] > 0.5, dtype=tf.float32)
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)

accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(predicted,1),
                                tf.argmax(Y,1)), dtype=tf.float32))

#Initializing global variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
   
    for epoch in range(training_epochs):
        _, c, w1, B1, w2, B2 = sess.run([optimizer, cost, W1, b1, W2, b2],
                                        feed_dict={X: X_train, Y: Y_train})
        #if(epoch + 1) % display_steps == 0:
            #print( "Epoch: ", (epoch+1), "Cost: ", c, w1, B1, w2, B2 )
    print("Optimization Finished!")

    # Accuracy report
    h, p, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: X_train, Y: Y_train})
    #print("\nHypothesis:\n ", h, "\nCorrect:\n ", p, "\nAccuracy: ", a)
    print(p.shape)
    #print(p)
    tf_plot_decision_regions(Z_combined, y_combined, hypothesis, p1,p2,p3, test_idx=range(105, 150))
    plt.xlabel('petal length [standardized]')
    plt.ylabel('petal width [standardized]')
    plt.legend(loc='upper left')
    plt.tight_layout()
    plt.show()
    sess.close()

end_time = time.time()
print( "Completed in ", end_time - start_time , " seconds")