defstandardize(data): for col inrange(len(data[0])): data_col = [] for i inrange(len(data)): data_col.append(data[i][col]) mean = np.mean(data_col, axis=0) std = np.std(data_col, axis=0) for i inrange(len(data)): data[i][col] = (data[i][col] - mean) / std return data
1 2 3 4 5 6 7 8 9 10 11
# 分别对每一列的数值进行归一化处理 defnormalize(data): for col inrange(len(data[0])): mx = data[0][col] mn = data[0][col] for i inrange(len(data)): mx = max(mx, data[i][col]) mn = min(mn, data[i][col]) for i inrange(len(data)): data[i][col] = (data[i][col] - mn) / (mx - mn) return data
p=1, preprocess=null The mean squar error of train and test are: 0.03, 0.00 The decision coefficient is: 1.00 p=1, preprocess=standardize The mean squar error of train and test are: 0.04, 0.03 The decision coefficient is: 0.97 p=1, preprocess=normalize The mean squar error of train and test are: 0.06, 0.00 The decision coefficient is: 1.00
p=2, preprocess=null The mean squar error of train and test are: 0.03, 0.00 The decision coefficient is: 1.00 p=2, preprocess=standardize The mean squar error of train and test are: 0.04, 0.03 The decision coefficient is: 0.97 p=2, preprocess=normalize The mean squar error of train and test are: 0.04, 0.00 The decision coefficient is: 1.00
p=-1, preprocess=null The mean squar error of train and test are: 0.03, 0.00 The decision coefficient is: 1.00 p=-1, preprocess=standardize The mean squar error of train and test are: 0.04, 0.07 The decision coefficient is: 0.93 p=-1, preprocess=normalize The mean squar error of train and test are: 0.03, 0.00 The decision coefficient is: 1.00
defstandardize(data): for col inrange(len(data[0])): data_col = [] for i inrange(len(data)): data_col.append(data[i][col]) mean = np.mean(data_col, axis=0) std = np.std(data_col, axis=0) for i inrange(len(data)): data[i][col] = (data[i][col] - mean) / std return data
1 2 3 4 5 6 7 8 9 10 11
# 分别对每一列的数值进行归一化处理 defnormalize(data): for col inrange(len(data[0])): mx = data[0][col] mn = data[0][col] for i inrange(len(data)): mx = max(mx, data[i][col]) mn = min(mn, data[i][col]) for i inrange(len(data)): data[i][col] = (data[i][col] - mn) / (mx - mn) return data
# 在不同的p值下进行测试 for p in p_values: # 使用不同的处理操作 for preprocess in preprocess_options: # 输出p的不同取值和预处理的不同操作 print(f"p={p}, preprocess={preprocess}") X_train_cur = copy.deepcopy(X_train) X_test_cur = copy.deepcopy(X_test) # 对数据进行不同的预处理 if preprocess == 'null': pass elif preprocess == 'standardize': X_train_cur = standardize(X_train_cur) X_test_cur = standardize(X_test_cur) elif preprocess == 'normalize': X_train_cur = normalize(X_train_cur) X_test_cur = normalize(X_test_cur) # 创建KNN回归器 knn_regressor = KNNRegressor(k=k, p=p) # 在KNN上进行训练 knn_regressor.fit(X_train_cur, y_train) # 预测结果 y_train_pred = knn_regressor.predict(X_train_cur) y_test_pred = knn_regressor.predict(X_test_cur) # 计算损失和准确率 train_err = metrics.mean_squared_error(y_train, y_train_pred) test_err = metrics.mean_squared_error(y_test, y_test_pred) print( 'The mean squar error of train and test are: {:.2f}, {:.2f}'.format(train_err, test_err)) predict_score = knn_regressor.score(X_test_cur,y_test) print('The decision coefficient is: {:.2f}'.format(predict_score)) print(end='\n') print(end='\n')
测试结果
p=1, preprocess=null
The mean squar error of train and test are: 2651.05, 2925.80
The decision coefficient is: 54.09
p=1, preprocess=standardize
The mean squar error of train and test are: 2639.52, 3210.57
The decision coefficient is: 56.66
p=1, preprocess=normalize
The mean squar error of train and test are: 2659.10, 3438.37
The decision coefficient is: 58.64
p=2, preprocess=null
The mean squar error of train and test are: 2528.59, 3019.08
The decision coefficient is: 54.95
p=2, preprocess=standardize
The mean squar error of train and test are: 2553.65, 3111.98
The decision coefficient is: 55.79
p=2, preprocess=normalize
The mean squar error of train and test are: 2547.91, 3275.45
The decision coefficient is: 57.23
p=-1, preprocess=null
The mean squar error of train and test are: 2591.17, 3134.99
The decision coefficient is: 55.99
p=-1, preprocess=standardize
The mean squar error of train and test are: 2637.77, 2981.47
The decision coefficient is: 54.60
p=-1, preprocess=normalize
The mean squar error of train and test are: 2582.24, 3289.01
The decision coefficient is: 57.35