请问运行完这个程序大概需要花多长时间?
本人准备学习python和机器学习,刚刚搭建好环境,从书上抄了一段程序试验一下,无奈运行了一天还没出结果。CPU占用率一直接近100%。请各位帮忙看一下是我的程序有问题呢还是真的没运行完?大概需要多少时间?我的配置是E5-2650,8核16线程,主频好像是2.0G,8G内存。
程序如下:
# Load libraries
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import validation_curve
# Load data
digits = load_digits()
# Create feature matrix and target vector
features, target = digits.data, digits.target
# Create range of values for parameter
#param_range = np.arange(1, 250, 2)
param_range = np.arange(1, 250, 25)
# Calculate accuracy on training and test set using range of parameter values
train_scores, test_scores = validation_curve(
# Classifier
RandomForestClassifier(),
# Feature matrix
features,
# Target vector
target,
# Hyperparameter to examine
param_name="n_estimators",
# Range of hyperparameter's values
param_range=param_range,
# Number of folds
cv=3,
# Performance metric
scoring="accuracy",
# Use all computer cores
n_jobs=-1)
# Calculate mean and standard deviation for training set scores
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
# Calculate mean and standard deviation for test set scores
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)
# Plot mean accuracy scores for training and test sets
plt.plot(param_range, train_mean, label="Training score", color="black")
plt.plot(param_range, test_mean, label="Cross-validation score",
color="dimgrey")
# Plot accurancy bands for training and test sets
plt.fill_between(param_range, train_mean - train_std,
train_mean + train_std, color="gray")
plt.fill_between(param_range, test_mean - test_std,
test_mean + test_std, color="gainsboro")
# Create plot
plt.title("Validation Curve With Random Forest")
plt.xlabel("Number Of Trees")
plt.ylabel("Accuracy Score")
plt.tight_layout()
plt.legend(loc="best")
plt.show()