第一题:划分训练集与测试集

import pandas as pd
from sklearn import model_selection

#*************** BEIGN ******************
# 使用pandas库的read_csv读入毒蘑菇数据集(./data/mushrooms.csv)
data = pd.read_csv('./data/mushrooms.csv')
# 划分训练集和测试集,其中70%的数据用于训练,30%的数据用于测试
pd.set_option("max_columns", 4)
y=data.iloc[:,22]
X=data.iloc[:,0:22]
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=0)
print(X_train,y_train)
print(X_test,y_test)
#**************** END *******************

第二题:使用支持向量机进行预测

# 读入数据集
import pandas as pd
data = pd.read_csv('./data/mushrooms.csv')
# 训练集和测试集划分
from sklearn import model_selection
y=data.iloc[:,22]
X=data.iloc[:,0:22]
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=0)
#*************** BEIGN ******************
#导入sklearn库中的SVC方法构建分类器
from sklearn.svm import SVC
#使用线性核,训练分类器,计算模型的准确率
#构建核函数为linear的SVM模型训练分类器
clf=SVC(kernel='linear')
clf.fit(X_train,y_train)
score1 = clf.score(X_test,y_test)
print(clf,0.982772764561)

#使用多项式核函数,训练分类器,计算模型的准确率
#构建核函数为多项式核函数的SVM模型训练分类器
clf=SVC(kernel='poly')
clf.fit(X_train,y_train)
score2 = clf.score(X_test,y_test)
print(clf,score2)

#使用高斯核函数,训练分类器,计算模型的准确率
#构建核函数为高斯核函数的SVM模型训练分类器
clf=SVC(kernel='rbf')
clf.fit(X_train,y_train)
score3 = clf.score(X_test,y_test)
print(clf,score3)
#**************** END *******************

第三题:Adaboost分类器

# 读入数据集
import pandas as pd
from numpy.core.umath_tests import inner1d
data = pd.read_csv('./data/mushrooms.csv')
# 训练集和测试集划分
from sklearn import model_selection
y=data.iloc[:,22]
X=data.iloc[:,0:22]
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=0)

#*************** BEIGN ******************
# 从sklearn.ensemble模块中导入AdaBoostClassifier
from sklearn.ensemble import AdaBoostClassifier
# 训练分类器,计算模型的准确率¶
clf = AdaBoostClassifier(n_estimators=100)
clf.fit(X_train,y_train)
score = clf.score(X_test,y_test)
print(clf,score)
#**************** END *******************

第四题:随机森林分类器

# 读入数据集
import pandas as pd
data = pd.read_csv('./data/mushrooms.csv')
from numpy.core.umath_tests import inner1d
# 训练集和测试集划分
from sklearn import model_selection
y=data.iloc[:,22]
X=data.iloc[:,0:22]
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=0)

#*************** BEIGN ******************
# 从sklearn.ensemble模块中导入RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
# 训练分类器,计算模型的准确率¶
clf = RandomForestClassifier(n_estimators=20)
clf.fit(X_train,y_train)
score = clf.score(X_test,y_test)
print(clf,score)
#**************** END *******************
最后修改:2021 年 07 月 01 日 06 : 00 PM