import pandas as pd
from sklearn.preprocessing import StandardScaler
datos = {
'superficie': [75, 95, 60, 120, 85, 50, 110, 70, 140, 65, 90, 100],
'habitaciones': [3, 3, 2, 4, 3, 1, 4, 2, 5, 2, 3, 4],
'banios': [1, 2, 1, 2, 2, 1, 3, 1, 3, 1, 2, 2],
'antiguedad': [20, 10, 35, 5, 15, 40, 8, 25, 2, 30, 12, 7],
'dist_centro': [5.2, 3.8, 8.1, 2.1, 4.5, 9.0, 3.2, 6.7, 1.5, 7.4, 4.0, 2.8],
}
df = pd.DataFrame(datos)
print('─── ANTES ───')
print(df)
print(f'Medias: {df.mean().round(2).to_dict()}')
print(f'Desv.tip.: {df.std().round(2).to_dict()}')
# ── Aplicar Standard Scaler ──────────────────────────────────────
scaler = StandardScaler()
X_std = scaler.fit_transform(df)
df_std = pd.DataFrame(X_std, columns=df.columns)
print('─── DESPUÉS (Standard Scaler) ───')
print(df_std.round(3))
print(f'Medias tras escalar: {df_std.mean().round(6).to_dict()}')
print(f'Desv.tip. tras escalar: {df_std.std().round(3).to_dict()}')
# Medias ≈ 0.0 | Desviaciones típicas ≈ 1.0
# ── Ver los parámetros aprendidos ────────────────────────────────
print('Medias aprendidas (mean_):', scaler.mean_.round(3))
print('Desv. típ. aprendidas (scale_):', scaler.scale_.round(3))
# ── Uso CORRECTO con train/test ──────────────────────────────────
from sklearn.model_selection import train_test_split
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)
scaler2 = StandardScaler()
X_train_std = scaler2.fit_transform(X_train) # aprende media/std de TRAIN
X_test_std = scaler2.transform(X_test) # aplica misma media/std a TEST