25 January 2020

Views: 10

def mining():
#-------------------------------------------------Proses Data Mining Mulai------------------------------------------------
#proses import modul yang digunakan dalam penelitian
import pandas as pd
import numpy as np
import pyswarms as ps
#pengambilan modul tertentu dari library
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn import metrics
df1 = pd.read_csv('dataset/wbc.csv') #import dataset asli WBC (Original)
df2 = pd.read_csv('dataset/wbcnew.csv') #import dataset WBC (Original) tanpa missing value
n1=len(df1) #jumlah instance wbc.csv
n2=len(df2) #jumlah instance wbcnew.csv
a = np.array(df1.drop(['Class','id'],1)) #konversi array atribut kecuali class dan id data ori
b = np.array(df1['Class']) #konversi array class data ori
X = np.array(df2.drop(['Class','id'], 1)) #konversi array atribut kecuali class dan id data non miss
y = np.array(df2['Class']) #konversi array class data non miss
classifier = DecisionTreeClassifier(criterion="entropy", random_state=5) #proses algoritma C4.5
#-----------------------------------------------Proses PSO Mulai-----------------------------------------------------------
# Define objective function
def f_per_particle(m, alpha):
total_features = n_feature
# Get the subset of the features from the binary mask
if np.count_nonzero(m) == 0:
X_subset = X
X_subset = X[:,m==1]
# Perform classification and store performance in P
X_trainsubset, X_testsubset, y_trainsubset, y_testsubset = train_test_split(X_subset, y, test_size=0.2, random_state=5), y_trainsubset)
P = (classifier.predict(X_testsubset) == y_testsubset).mean()
# Compute for the objective function
j = (alpha * (1.0 - P)
+ (1.0 - alpha) * (1 - (X_subset.shape[1] / total_features)))
return j
def f(x, alpha=0.88):
n_particles = x.shape[0]
j = [f_per_particle(x[i], alpha) for i in range(n_particles)]
return np.array(j)
# Initialize swarm, arbitrary
options = {'c1': 0.5, 'c2': 0.5, 'w':0.72, 'k': 30, 'p':3}
# Call instance of PSO
n_sample, n_feature = X.shape
dimensions = n_feature # dimensions should be the number of features
optimizer = ps.discrete.BinaryPSO(n_particles=50, dimensions=dimensions, options=options)
# Perform optimization
cost, pos = optimizer.optimize(f, iters=100)
# Create two instances of DecisionTree
classifier = DecisionTreeClassifier(criterion="entropy", random_state=5)
# Get the selected features from the final positions
X_selected_features = X[:,pos==1] # subset
attr = ["Clump Thickness","Uniformity of Cell Size","Uniformity of Cell Shape","Marginal Adhesion",
"Single Epithelial Cell Size","Bare Nuclei","Bland Chromatin","Normal Nucleoli","Mitoses"]
attrArr = np.array(attr)
tempAttrSelected = attrArr[pos == 1]
attrSelected = pd.DataFrame(tempAttrSelected)
nAttr = attrSelected.shape[0]
tempDataPSO = X[:, pos == 1]
dataPSO = pd.DataFrame(tempDataPSO) #pembuatan data frame dari fitur terpilih
n3 = len(dataPSO) #jumlah instance fitur terpilih
X_train, X_test, y_train, y_test = train_test_split(X_selected_features, y, test_size=0.2, random_state=5) #pembagian data
# Perform classification and store performance in P, y_train)
# Compute performance
subset_performance = (classifier.predict(X_test) == y_test).mean()
#--------------------------------------------Proses PSO Selesai-------------------------------------------------------------
#--------------------------------------------Proses Bagging sampai hasil----------------------------------------------------
bag_model=BaggingClassifier(base_estimator=classifier, n_estimators=10, bootstrap=True, random_state=5) #proses bagging,y_train)
X_subset = X #definisi X_subset
X_trainsubset, X_testsubset, y_trainsubset, y_testsubset = train_test_split(X_subset, y, test_size=0.2, random_state=5) #pembagian data, y_trainsubset)
bagmod = BaggingClassifier(base_estimator=classifier, n_estimators=10, bootstrap=True, random_state=5)
bagmod =, y_trainsubset)
ytest_bag = bagmod.predict(X_testsubset)
ytest_pred=bag_model.predict(X_test) #proses prediksi purposed methods
y_pred = classifier.predict(X_testsubset) #proses prediksi c4.5
Akurasic45 = metrics.accuracy_score(y_testsubset, y_pred) #hasil akurasi c4.5
Akurasic45bag = bagmod.score(X_testsubset, y_testsubset)
Akurasipurposed = bag_model.score(X_test, y_test) #hasil akurasi pursposed methods
c45 = np.around(Akurasic45*100, decimals=2) #mendapatkan akurasi yang dihasilkan c4.5
c45bag = np.around(Akurasic45bag*100, decimals=2)
c45psobag = np.around(Akurasipurposed*100, decimals=2) #mendapatkan akurasi yang dihasilkan c4.5 dengan PSO dan Bagging
#-------------------------------------------Proses Data Mining Selesai-----------------------------------------------------
return render_template('mining.html', Akurasic45bag=Akurasic45bag, c45bag=c45bag, Akurasic45=Akurasic45, c45=c45,
Akurasipurposed=Akurasipurposed, c45psobag=c45psobag, df1=df1, X=X, y=y, n1=n1, df2=df2, n2=n2, a=a, b=b,
dataPSO=dataPSO, n3=n3, nAttr=nAttr, attrSelected=attrSelected)


Disable Third Party Ads