Ejemplo regresión lineal

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 1. DATOS: horas de estudio y notas de 20 estudiantes

horas = np.array([[ 1.        ],  [ 1.47368421],  [ 1.94736842],  [ 2.42105263],  [ 2.89473684],  [ 3.36842105],  [ 3.84210526],  [ 4.31578947],  [ 4.78947368],  [ 5.26315789],  [ 5.73684211],  [ 6.21052632]
,  [ 6.68421053],  [ 7.15789474],  [ 7.63157895],  [ 8.10526316],  [ 8.57894737],  [ 9.05263158],  [ 9.52631579],  [10.        ]])
notas = np.array([ 2.84835708, 2.95718364, 3.77647585, 4.6404623, 4.18818647, 4.61451047
, 5.94750114, 5.96792789, 5.77578912, 6.70812212, 6.63144905, 7.05660881
, 7.83677061, 7.18546514, 7.70596214, 8.71359308, 8.91463707, 10.00449209
, 9.81967217, 9.99384815])

# 2. DIVISION TRAIN / TEST (70% entrenamiento, 30% test)
X_train, X_test, y_train, y_test = train_test_split(
    horas, notas, test_size=0.3,random_state=42
)

print(f"Datos de entrenamiento: {len(X_train)} muestras")
print(f"Datos de test:          {len(X_test)} muestras")

# 3. ENTRENAMIENTO: el modelo aprende con X_train
modelo = LinearRegression()
modelo.fit(X_train, y_train)          # Aqui ocurre el aprendizaje

print(f"\nEcuacion aprendida:")
print(f"  nota = {modelo.coef_[0]:.3f} * horas + {modelo.intercept_:.3f}")

# 4. PREDICCION en datos de test (datos nunca vistos)
y_pred = modelo.predict(X_test)

# 5. EVALUACION con metricas
mae  = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2   = r2_score(y_test, y_pred)

print(f"\nResultados en el conjunto de TEST:")
print(f"  MAE  = {mae:.3f}   (error medio absoluto)")
print(f"  RMSE = {rmse:.3f}  (raiz error cuadratico)")
print(f"  R2   = {r2:.3f}   (coef. determinacion)")

# 6. VISUALIZACION: entrenamiento vs test
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

axes[0].scatter(X_train, y_train, color="steelblue", label="Train")
axes[0].plot(horas, modelo.predict(horas), "r-", label="Modelo")
axes[0].set_title("Fase de Entrenamiento")
axes[0].set_xlabel("Horas"); axes[0].set_ylabel("Nota")
axes[0].legend()

axes[1].scatter(X_test, y_test, color="orange", label="Test")
axes[1].plot(horas, modelo.predict(horas), "r-", label="Modelo")
axes[1].set_title(f"Fase de Test  (R2={r2:.2f})")
axes[1].set_xlabel("Horas"); axes[1].set_ylabel("Nota")
axes[1].legend()

plt.tight_layout()
plt.show()

Publicado por

Juan Pablo Fuentes

Formador de programación y bases de datos