import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# 1. DATOS: horas de estudio y notas de 20 estudiantes
horas = np.array([[ 1. ], [ 1.47368421], [ 1.94736842], [ 2.42105263], [ 2.89473684], [ 3.36842105], [ 3.84210526], [ 4.31578947], [ 4.78947368], [ 5.26315789], [ 5.73684211], [ 6.21052632]
, [ 6.68421053], [ 7.15789474], [ 7.63157895], [ 8.10526316], [ 8.57894737], [ 9.05263158], [ 9.52631579], [10. ]])
notas = np.array([ 2.84835708, 2.95718364, 3.77647585, 4.6404623, 4.18818647, 4.61451047
, 5.94750114, 5.96792789, 5.77578912, 6.70812212, 6.63144905, 7.05660881
, 7.83677061, 7.18546514, 7.70596214, 8.71359308, 8.91463707, 10.00449209
, 9.81967217, 9.99384815])
# 2. DIVISION TRAIN / TEST (70% entrenamiento, 30% test)
X_train, X_test, y_train, y_test = train_test_split(
horas, notas, test_size=0.3,random_state=42
)
print(f"Datos de entrenamiento: {len(X_train)} muestras")
print(f"Datos de test: {len(X_test)} muestras")
# 3. ENTRENAMIENTO: el modelo aprende con X_train
modelo = LinearRegression()
modelo.fit(X_train, y_train) # Aqui ocurre el aprendizaje
print(f"\nEcuacion aprendida:")
print(f" nota = {modelo.coef_[0]:.3f} * horas + {modelo.intercept_:.3f}")
# 4. PREDICCION en datos de test (datos nunca vistos)
y_pred = modelo.predict(X_test)
# 5. EVALUACION con metricas
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"\nResultados en el conjunto de TEST:")
print(f" MAE = {mae:.3f} (error medio absoluto)")
print(f" RMSE = {rmse:.3f} (raiz error cuadratico)")
print(f" R2 = {r2:.3f} (coef. determinacion)")
# 6. VISUALIZACION: entrenamiento vs test
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
axes[0].scatter(X_train, y_train, color="steelblue", label="Train")
axes[0].plot(horas, modelo.predict(horas), "r-", label="Modelo")
axes[0].set_title("Fase de Entrenamiento")
axes[0].set_xlabel("Horas"); axes[0].set_ylabel("Nota")
axes[0].legend()
axes[1].scatter(X_test, y_test, color="orange", label="Test")
axes[1].plot(horas, modelo.predict(horas), "r-", label="Modelo")
axes[1].set_title(f"Fase de Test (R2={r2:.2f})")
axes[1].set_xlabel("Horas"); axes[1].set_ylabel("Nota")
axes[1].legend()
plt.tight_layout()
plt.show()