展示網(wǎng)站欣賞百度怎么注冊公司網(wǎng)站
對之前的內容做一個梳理,圍繞機器學習全流程展開,從數(shù)據(jù)預處理 → 特征工程 → 模型訓練 → 評估優(yōu)化,形成完整閉環(huán)。
將之前做過的關鍵步驟記錄下來:
# ==== 編程基礎 ====
# DAY1: 變量與格式化字符串
name = "Alice"
print(f"Hello, {name}!")# DAY3: 列表、循環(huán)和判斷
nums = [1, 2, 3]
for num in nums:if num > 1: print(num)# ==== 數(shù)據(jù)處理 ====
# DAY4: 缺失值處理 (Pandas)
import pandas as pd
df = pd.DataFrame({'A': [1, None, 3]})
df.fillna(df.mean(), inplace=True)# DAY5: 獨熱編碼
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder().fit(df[['category']])# DAY8: 標簽編碼
from sklearn.preprocessing import LabelEncoder
LabelEncoder().fit_transform(['A', 'B', 'A'])# ==== 可視化 ====
# DAY9: 熱力圖 (Seaborn)
import seaborn as sns
sns.heatmap(df.corr(), annot=True)# ==== 機器學習 ====
# DAY10: 建模與評估 (Scikit-learn)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier().fit(X_train, y_train)
print(model.score(X_test, y_test))# DAY11: 調參 (GridSearchCV)
from sklearn.model_selection import GridSearchCV
param_grid = {'n_estimators': [50, 100]}
GridSearchCV(model, param_grid, cv=5).fit(X, y)# DAY14: SHAP分析
import shap
shap_values = shap.TreeExplainer(model).shap_values(X)# ==== 特征工程 ====
# DAY19: 特征篩選 (Lasso)
from sklearn.linear_model import Lasso
Lasso(alpha=0.1).fit(X, y).coef_# DAY20: SVD分解
from sklearn.decomposition import TruncatedSVD
TruncatedSVD(n_components=2).fit_transform(X)# ==== 高級語法 ====
# DAY25: 異常處理
try: x = 1/0
except ZeroDivisionError: print("Error")# DAY27: 裝飾器
def my_decorator(func):def wrapper(): print("Before"); func()return wrapper# DAY28: 類定義
class MyClass:def __init__(self, x): self.x = xdef print_x(self): print(self.x)
@浙大疏錦行