from sklearn.datasets import fetch_california_housing from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split import joblib import pandas as pd # Load dataset data = fetch_california_housing() df = pd.DataFrame(data.data, columns=data.feature_names) df['target'] = data.target # in 100k USD # Engineer features df['square_feet'] = df['AveRooms'] * 350 df['bedrooms'] = df['AveBedrms'] df['bathrooms'] = df['AveRooms'] * 0.2 # Clean bathrooms df['bathrooms'] = df['bathrooms'].clip(lower=1) X = df[['square_feet', 'bedrooms', 'bathrooms']] y = df['target'] # Train/test split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) # Train model model = LinearRegression() model.fit(X_train, y_train) # Need to be tested of course..: ) # Save model joblib.dump(model, 'price_predictor.pkl')