{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**Chapter 2 – End-to-end Machine Learning project**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "*This notebook contains all the sample code and solutions to the exercises in chapter 2.*" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n",
" | \n",
" \n",
" | \n",
"
\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "total_bedrooms | \n", "population | \n", "households | \n", "median_income | \n", "median_house_value | \n", "ocean_proximity | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "-122.23 | \n", "37.88 | \n", "41.0 | \n", "880.0 | \n", "129.0 | \n", "322.0 | \n", "126.0 | \n", "8.3252 | \n", "452600.0 | \n", "NEAR BAY | \n", "
1 | \n", "-122.22 | \n", "37.86 | \n", "21.0 | \n", "7099.0 | \n", "1106.0 | \n", "2401.0 | \n", "1138.0 | \n", "8.3014 | \n", "358500.0 | \n", "NEAR BAY | \n", "
2 | \n", "-122.24 | \n", "37.85 | \n", "52.0 | \n", "1467.0 | \n", "190.0 | \n", "496.0 | \n", "177.0 | \n", "7.2574 | \n", "352100.0 | \n", "NEAR BAY | \n", "
3 | \n", "-122.25 | \n", "37.85 | \n", "52.0 | \n", "1274.0 | \n", "235.0 | \n", "558.0 | \n", "219.0 | \n", "5.6431 | \n", "341300.0 | \n", "NEAR BAY | \n", "
4 | \n", "-122.25 | \n", "37.85 | \n", "52.0 | \n", "1627.0 | \n", "280.0 | \n", "565.0 | \n", "259.0 | \n", "3.8462 | \n", "342200.0 | \n", "NEAR BAY | \n", "
\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "total_bedrooms | \n", "population | \n", "households | \n", "median_income | \n", "median_house_value | \n", "
---|---|---|---|---|---|---|---|---|---|
count | \n", "20640.000000 | \n", "20640.000000 | \n", "20640.000000 | \n", "20640.000000 | \n", "20433.000000 | \n", "20640.000000 | \n", "20640.000000 | \n", "20640.000000 | \n", "20640.000000 | \n", "
mean | \n", "-119.569704 | \n", "35.631861 | \n", "28.639486 | \n", "2635.763081 | \n", "537.870553 | \n", "1425.476744 | \n", "499.539680 | \n", "3.870671 | \n", "206855.816909 | \n", "
std | \n", "2.003532 | \n", "2.135952 | \n", "12.585558 | \n", "2181.615252 | \n", "421.385070 | \n", "1132.462122 | \n", "382.329753 | \n", "1.899822 | \n", "115395.615874 | \n", "
min | \n", "-124.350000 | \n", "32.540000 | \n", "1.000000 | \n", "2.000000 | \n", "1.000000 | \n", "3.000000 | \n", "1.000000 | \n", "0.499900 | \n", "14999.000000 | \n", "
25% | \n", "-121.800000 | \n", "33.930000 | \n", "18.000000 | \n", "1447.750000 | \n", "296.000000 | \n", "787.000000 | \n", "280.000000 | \n", "2.563400 | \n", "119600.000000 | \n", "
50% | \n", "-118.490000 | \n", "34.260000 | \n", "29.000000 | \n", "2127.000000 | \n", "435.000000 | \n", "1166.000000 | \n", "409.000000 | \n", "3.534800 | \n", "179700.000000 | \n", "
75% | \n", "-118.010000 | \n", "37.710000 | \n", "37.000000 | \n", "3148.000000 | \n", "647.000000 | \n", "1725.000000 | \n", "605.000000 | \n", "4.743250 | \n", "264725.000000 | \n", "
max | \n", "-114.310000 | \n", "41.950000 | \n", "52.000000 | \n", "39320.000000 | \n", "6445.000000 | \n", "35682.000000 | \n", "6082.000000 | \n", "15.000100 | \n", "500001.000000 | \n", "
\n", " | Overall % | \n", "Stratified % | \n", "Random % | \n", "Strat. Error % | \n", "Rand. Error % | \n", "
---|---|---|---|---|---|
Income Category | \n", "\n", " | \n", " | \n", " | \n", " | \n", " |
1 | \n", "3.98 | \n", "4.00 | \n", "4.24 | \n", "0.36 | \n", "6.45 | \n", "
2 | \n", "31.88 | \n", "31.88 | \n", "30.74 | \n", "-0.02 | \n", "-3.59 | \n", "
3 | \n", "35.06 | \n", "35.05 | \n", "34.52 | \n", "-0.01 | \n", "-1.53 | \n", "
4 | \n", "17.63 | \n", "17.64 | \n", "18.41 | \n", "0.03 | \n", "4.42 | \n", "
5 | \n", "11.44 | \n", "11.43 | \n", "12.09 | \n", "-0.08 | \n", "5.63 | \n", "
\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "total_bedrooms | \n", "population | \n", "households | \n", "median_income | \n", "ocean_proximity | \n", "
---|---|---|---|---|---|---|---|---|---|
14452 | \n", "-120.67 | \n", "40.50 | \n", "15.0 | \n", "5343.0 | \n", "NaN | \n", "2503.0 | \n", "902.0 | \n", "3.5962 | \n", "INLAND | \n", "
18217 | \n", "-117.96 | \n", "34.03 | \n", "35.0 | \n", "2093.0 | \n", "NaN | \n", "1755.0 | \n", "403.0 | \n", "3.4115 | \n", "<1H OCEAN | \n", "
11889 | \n", "-118.05 | \n", "34.04 | \n", "33.0 | \n", "1348.0 | \n", "NaN | \n", "1098.0 | \n", "257.0 | \n", "4.2917 | \n", "<1H OCEAN | \n", "
20325 | \n", "-118.88 | \n", "34.17 | \n", "15.0 | \n", "4260.0 | \n", "NaN | \n", "1701.0 | \n", "669.0 | \n", "5.1033 | \n", "<1H OCEAN | \n", "
14360 | \n", "-117.87 | \n", "33.62 | \n", "8.0 | \n", "1266.0 | \n", "NaN | \n", "375.0 | \n", "183.0 | \n", "9.8020 | \n", "<1H OCEAN | \n", "
\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "total_bedrooms | \n", "population | \n", "households | \n", "median_income | \n", "ocean_proximity | \n", "
---|
\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "population | \n", "households | \n", "median_income | \n", "ocean_proximity | \n", "
---|---|---|---|---|---|---|---|---|
14452 | \n", "-120.67 | \n", "40.50 | \n", "15.0 | \n", "5343.0 | \n", "2503.0 | \n", "902.0 | \n", "3.5962 | \n", "INLAND | \n", "
18217 | \n", "-117.96 | \n", "34.03 | \n", "35.0 | \n", "2093.0 | \n", "1755.0 | \n", "403.0 | \n", "3.4115 | \n", "<1H OCEAN | \n", "
11889 | \n", "-118.05 | \n", "34.04 | \n", "33.0 | \n", "1348.0 | \n", "1098.0 | \n", "257.0 | \n", "4.2917 | \n", "<1H OCEAN | \n", "
20325 | \n", "-118.88 | \n", "34.17 | \n", "15.0 | \n", "4260.0 | \n", "1701.0 | \n", "669.0 | \n", "5.1033 | \n", "<1H OCEAN | \n", "
14360 | \n", "-117.87 | \n", "33.62 | \n", "8.0 | \n", "1266.0 | \n", "375.0 | \n", "183.0 | \n", "9.8020 | \n", "<1H OCEAN | \n", "
\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "total_bedrooms | \n", "population | \n", "households | \n", "median_income | \n", "ocean_proximity | \n", "
---|---|---|---|---|---|---|---|---|---|
14452 | \n", "-120.67 | \n", "40.50 | \n", "15.0 | \n", "5343.0 | \n", "434.0 | \n", "2503.0 | \n", "902.0 | \n", "3.5962 | \n", "INLAND | \n", "
18217 | \n", "-117.96 | \n", "34.03 | \n", "35.0 | \n", "2093.0 | \n", "434.0 | \n", "1755.0 | \n", "403.0 | \n", "3.4115 | \n", "<1H OCEAN | \n", "
11889 | \n", "-118.05 | \n", "34.04 | \n", "33.0 | \n", "1348.0 | \n", "434.0 | \n", "1098.0 | \n", "257.0 | \n", "4.2917 | \n", "<1H OCEAN | \n", "
20325 | \n", "-118.88 | \n", "34.17 | \n", "15.0 | \n", "4260.0 | \n", "434.0 | \n", "1701.0 | \n", "669.0 | \n", "5.1033 | \n", "<1H OCEAN | \n", "
14360 | \n", "-117.87 | \n", "33.62 | \n", "8.0 | \n", "1266.0 | \n", "434.0 | \n", "375.0 | \n", "183.0 | \n", "9.8020 | \n", "<1H OCEAN | \n", "
SimpleImputer(strategy='median')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SimpleImputer(strategy='median')
\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "total_bedrooms | \n", "population | \n", "households | \n", "median_income | \n", "
---|---|---|---|---|---|---|---|---|
14452 | \n", "-120.67 | \n", "40.50 | \n", "15.0 | \n", "5343.0 | \n", "434.0 | \n", "2503.0 | \n", "902.0 | \n", "3.5962 | \n", "
18217 | \n", "-117.96 | \n", "34.03 | \n", "35.0 | \n", "2093.0 | \n", "434.0 | \n", "1755.0 | \n", "403.0 | \n", "3.4115 | \n", "
11889 | \n", "-118.05 | \n", "34.04 | \n", "33.0 | \n", "1348.0 | \n", "434.0 | \n", "1098.0 | \n", "257.0 | \n", "4.2917 | \n", "
20325 | \n", "-118.88 | \n", "34.17 | \n", "15.0 | \n", "4260.0 | \n", "434.0 | \n", "1701.0 | \n", "669.0 | \n", "5.1033 | \n", "
14360 | \n", "-117.87 | \n", "33.62 | \n", "8.0 | \n", "1266.0 | \n", "434.0 | \n", "375.0 | \n", "183.0 | \n", "9.8020 | \n", "
\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "total_bedrooms | \n", "population | \n", "households | \n", "median_income | \n", "
---|---|---|---|---|---|---|---|---|
14452 | \n", "-120.67 | \n", "40.50 | \n", "15.0 | \n", "5343.0 | \n", "434.0 | \n", "2503.0 | \n", "902.0 | \n", "3.5962 | \n", "
18217 | \n", "-117.96 | \n", "34.03 | \n", "35.0 | \n", "2093.0 | \n", "434.0 | \n", "1755.0 | \n", "403.0 | \n", "3.4115 | \n", "
11889 | \n", "-118.05 | \n", "34.04 | \n", "33.0 | \n", "1348.0 | \n", "434.0 | \n", "1098.0 | \n", "257.0 | \n", "4.2917 | \n", "
20325 | \n", "-118.88 | \n", "34.17 | \n", "15.0 | \n", "4260.0 | \n", "434.0 | \n", "1701.0 | \n", "669.0 | \n", "5.1033 | \n", "
14360 | \n", "-117.87 | \n", "33.62 | \n", "8.0 | \n", "1266.0 | \n", "434.0 | \n", "375.0 | \n", "183.0 | \n", "9.8020 | \n", "
\n", " | ocean_proximity | \n", "
---|---|
13096 | \n", "NEAR BAY | \n", "
14973 | \n", "<1H OCEAN | \n", "
3785 | \n", "INLAND | \n", "
14689 | \n", "INLAND | \n", "
20507 | \n", "NEAR OCEAN | \n", "
1286 | \n", "INLAND | \n", "
18078 | \n", "<1H OCEAN | \n", "
4396 | \n", "NEAR BAY | \n", "
\n", " | ocean_proximity_INLAND | \n", "ocean_proximity_NEAR BAY | \n", "
---|---|---|
0 | \n", "1 | \n", "0 | \n", "
1 | \n", "0 | \n", "1 | \n", "
\n", " | ocean_proximity_<2H OCEAN | \n", "ocean_proximity_ISLAND | \n", "
---|---|---|
0 | \n", "1 | \n", "0 | \n", "
1 | \n", "0 | \n", "1 | \n", "
\n", " | ocean_proximity_<1H OCEAN | \n", "ocean_proximity_INLAND | \n", "ocean_proximity_ISLAND | \n", "ocean_proximity_NEAR BAY | \n", "ocean_proximity_NEAR OCEAN | \n", "
---|---|---|---|---|---|
0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "
Pipeline(steps=[('simpleimputer', SimpleImputer(strategy='median')),\n", " ('standardscaler', StandardScaler())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('simpleimputer', SimpleImputer(strategy='median')),\n", " ('standardscaler', StandardScaler())])
SimpleImputer(strategy='median')
StandardScaler()
\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "total_bedrooms | \n", "population | \n", "households | \n", "median_income | \n", "
---|---|---|---|---|---|---|---|---|
13096 | \n", "-1.423037 | \n", "1.013606 | \n", "1.861119 | \n", "0.311912 | \n", "1.368167 | \n", "0.137460 | \n", "1.394812 | \n", "-0.936491 | \n", "
14973 | \n", "0.596394 | \n", "-0.702103 | \n", "0.907630 | \n", "-0.308620 | \n", "-0.435925 | \n", "-0.693771 | \n", "-0.373485 | \n", "1.171942 | \n", "
StandardScaler()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
StandardScaler()
Pipeline(steps=[('simpleimputer', SimpleImputer(strategy='median'))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('simpleimputer', SimpleImputer(strategy='median'))])
SimpleImputer(strategy='median')
SimpleImputer(strategy='median')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SimpleImputer(strategy='median')
Pipeline(steps=[('simpleimputer', SimpleImputer(strategy='median')),\n", " ('standardscaler', StandardScaler())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('simpleimputer', SimpleImputer(strategy='median')),\n", " ('standardscaler', StandardScaler())])
SimpleImputer(strategy='median')
StandardScaler()
\n", " | pipeline-1__longitude | \n", "pipeline-1__latitude | \n", "pipeline-1__housing_median_age | \n", "pipeline-1__total_rooms | \n", "pipeline-1__total_bedrooms | \n", "pipeline-1__population | \n", "pipeline-1__households | \n", "pipeline-1__median_income | \n", "pipeline-2__ocean_proximity_<1H OCEAN | \n", "pipeline-2__ocean_proximity_INLAND | \n", "pipeline-2__ocean_proximity_ISLAND | \n", "pipeline-2__ocean_proximity_NEAR BAY | \n", "pipeline-2__ocean_proximity_NEAR OCEAN | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
13096 | \n", "-1.423037 | \n", "1.013606 | \n", "1.861119 | \n", "0.311912 | \n", "1.368167 | \n", "0.137460 | \n", "1.394812 | \n", "-0.936491 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "
14973 | \n", "0.596394 | \n", "-0.702103 | \n", "0.907630 | \n", "-0.308620 | \n", "-0.435925 | \n", "-0.693771 | \n", "-0.373485 | \n", "1.171942 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6...\n", " 'median_income']),\n", " ('geo',\n", " ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('linearregression', LinearRegression())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6...\n", " 'median_income']),\n", " ('geo',\n", " ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('linearregression', LinearRegression())])
ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio...\n", " ['total_bedrooms', 'total_rooms', 'population',\n", " 'households', 'median_income']),\n", " ('geo', ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])
['total_bedrooms', 'total_rooms']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_rooms', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['population', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_bedrooms', 'total_rooms', 'population', 'households', 'median_income']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out='one-to-one', func=<ufunc 'log'>)
StandardScaler()
['latitude', 'longitude']
ClusterSimilarity(random_state=42)
<sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>
SimpleImputer(strategy='most_frequent')
OneHotEncoder(handle_unknown='ignore')
['housing_median_age']
SimpleImputer(strategy='median')
StandardScaler()
LinearRegression()
Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6...\n", " ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('decisiontreeregressor',\n", " DecisionTreeRegressor(random_state=42))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6...\n", " ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('decisiontreeregressor',\n", " DecisionTreeRegressor(random_state=42))])
ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio...\n", " ['total_bedrooms', 'total_rooms', 'population',\n", " 'households', 'median_income']),\n", " ('geo', ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])
['total_bedrooms', 'total_rooms']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_rooms', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['population', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_bedrooms', 'total_rooms', 'population', 'households', 'median_income']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out='one-to-one', func=<ufunc 'log'>)
StandardScaler()
['latitude', 'longitude']
ClusterSimilarity(random_state=42)
<sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>
SimpleImputer(strategy='most_frequent')
OneHotEncoder(handle_unknown='ignore')
['housing_median_age']
SimpleImputer(strategy='median')
StandardScaler()
DecisionTreeRegressor(random_state=42)
GridSearchCV(cv=3,\n", " estimator=Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<f...\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('random_forest',\n", " RandomForestRegressor(random_state=42))]),\n", " param_grid=[{'preprocessing__geo__n_clusters': [5, 8, 10],\n", " 'random_forest__max_features': [4, 6, 8]},\n", " {'preprocessing__geo__n_clusters': [10, 15],\n", " 'random_forest__max_features': [6, 8, 10]}],\n", " scoring='neg_root_mean_squared_error')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=3,\n", " estimator=Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<f...\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('random_forest',\n", " RandomForestRegressor(random_state=42))]),\n", " param_grid=[{'preprocessing__geo__n_clusters': [5, 8, 10],\n", " 'random_forest__max_features': [4, 6, 8]},\n", " {'preprocessing__geo__n_clusters': [10, 15],\n", " 'random_forest__max_features': [6, 8, 10]}],\n", " scoring='neg_root_mean_squared_error')
Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe...\n", " ('geo',\n", " ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('random_forest', RandomForestRegressor(random_state=42))])
ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio...\n", " ['total_bedrooms', 'total_rooms', 'population',\n", " 'households', 'median_income']),\n", " ('geo', ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])
['total_bedrooms', 'total_rooms']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_rooms', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['population', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_bedrooms', 'total_rooms', 'population', 'households', 'median_income']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out='one-to-one', func=<ufunc 'log'>)
StandardScaler()
['latitude', 'longitude']
ClusterSimilarity(random_state=42)
<sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>
SimpleImputer(strategy='most_frequent')
OneHotEncoder(handle_unknown='ignore')
['housing_median_age']
SimpleImputer(strategy='median')
StandardScaler()
RandomForestRegressor(random_state=42)
Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe...\n", " ClusterSimilarity(n_clusters=15,\n", " random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bdbef4970>)])),\n", " ('random_forest',\n", " RandomForestRegressor(max_features=6, random_state=42))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe...\n", " ClusterSimilarity(n_clusters=15,\n", " random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bdbef4970>)])),\n", " ('random_forest',\n", " RandomForestRegressor(max_features=6, random_state=42))])
ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio...\n", " ['total_bedrooms', 'total_rooms', 'population',\n", " 'households', 'median_income']),\n", " ('geo',\n", " ClusterSimilarity(n_clusters=15,\n", " random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bdbef4970>)])
['total_bedrooms', 'total_rooms']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_rooms', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['population', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_bedrooms', 'total_rooms', 'population', 'households', 'median_income']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out='one-to-one', func=<ufunc 'log'>)
StandardScaler()
['latitude', 'longitude']
ClusterSimilarity(n_clusters=15, random_state=42)
<sklearn.compose._column_transformer.make_column_selector object at 0x7f6bdbef4970>
SimpleImputer(strategy='most_frequent')
OneHotEncoder(handle_unknown='ignore')
['housing_median_age']
SimpleImputer(strategy='median')
StandardScaler()
RandomForestRegressor(max_features=6, random_state=42)
\n", " | n_clusters | \n", "max_features | \n", "split0 | \n", "split1 | \n", "split2 | \n", "mean_test_rmse | \n", "
---|---|---|---|---|---|---|
12 | \n", "15 | \n", "6 | \n", "43460 | \n", "43919 | \n", "44748 | \n", "44042 | \n", "
13 | \n", "15 | \n", "8 | \n", "44132 | \n", "44075 | \n", "45010 | \n", "44406 | \n", "
14 | \n", "15 | \n", "10 | \n", "44374 | \n", "44286 | \n", "45316 | \n", "44659 | \n", "
7 | \n", "10 | \n", "6 | \n", "44683 | \n", "44655 | \n", "45657 | \n", "44999 | \n", "
9 | \n", "10 | \n", "6 | \n", "44683 | \n", "44655 | \n", "45657 | \n", "44999 | \n", "
RandomizedSearchCV(cv=3,\n", " estimator=Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_...\n", " ('random_forest',\n", " RandomForestRegressor(random_state=42))]),\n", " param_distributions={'preprocessing__geo__n_clusters': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x7f6bd9735c30>,\n", " 'random_forest__max_features': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x7f6bdbd0a2f0>},\n", " random_state=42, scoring='neg_root_mean_squared_error')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomizedSearchCV(cv=3,\n", " estimator=Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_...\n", " ('random_forest',\n", " RandomForestRegressor(random_state=42))]),\n", " param_distributions={'preprocessing__geo__n_clusters': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x7f6bd9735c30>,\n", " 'random_forest__max_features': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x7f6bdbd0a2f0>},\n", " random_state=42, scoring='neg_root_mean_squared_error')
Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe...\n", " ('geo',\n", " ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('random_forest', RandomForestRegressor(random_state=42))])
ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio...\n", " ['total_bedrooms', 'total_rooms', 'population',\n", " 'households', 'median_income']),\n", " ('geo', ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])
['total_bedrooms', 'total_rooms']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_rooms', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['population', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_bedrooms', 'total_rooms', 'population', 'households', 'median_income']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out='one-to-one', func=<ufunc 'log'>)
StandardScaler()
['latitude', 'longitude']
ClusterSimilarity(random_state=42)
<sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>
SimpleImputer(strategy='most_frequent')
OneHotEncoder(handle_unknown='ignore')
['housing_median_age']
SimpleImputer(strategy='median')
StandardScaler()
RandomForestRegressor(random_state=42)
\n", " | n_clusters | \n", "max_features | \n", "split0 | \n", "split1 | \n", "split2 | \n", "mean_test_rmse | \n", "
---|---|---|---|---|---|---|
1 | \n", "45 | \n", "9 | \n", "41287 | \n", "42071 | \n", "42627 | \n", "41995 | \n", "
8 | \n", "32 | \n", "7 | \n", "41690 | \n", "42513 | \n", "43224 | \n", "42475 | \n", "
0 | \n", "41 | \n", "16 | \n", "42223 | \n", "42959 | \n", "43321 | \n", "42834 | \n", "
5 | \n", "42 | \n", "4 | \n", "41818 | \n", "43094 | \n", "43817 | \n", "42910 | \n", "
2 | \n", "23 | \n", "8 | \n", "42264 | \n", "42996 | \n", "43830 | \n", "43030 | \n", "
GridSearchCV(cv=3,\n", " estimator=Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<f...\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('svr', SVR())]),\n", " param_grid=[{'svr__C': [10.0, 30.0, 100.0, 300.0, 1000.0, 3000.0,\n", " 10000.0, 30000.0],\n", " 'svr__kernel': ['linear']},\n", " {'svr__C': [1.0, 3.0, 10.0, 30.0, 100.0, 300.0,\n", " 1000.0],\n", " 'svr__gamma': [0.01, 0.03, 0.1, 0.3, 1.0, 3.0],\n", " 'svr__kernel': ['rbf']}],\n", " scoring='neg_root_mean_squared_error')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=3,\n", " estimator=Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<f...\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('svr', SVR())]),\n", " param_grid=[{'svr__C': [10.0, 30.0, 100.0, 300.0, 1000.0, 3000.0,\n", " 10000.0, 30000.0],\n", " 'svr__kernel': ['linear']},\n", " {'svr__C': [1.0, 3.0, 10.0, 30.0, 100.0, 300.0,\n", " 1000.0],\n", " 'svr__gamma': [0.01, 0.03, 0.1, 0.3, 1.0, 3.0],\n", " 'svr__kernel': ['rbf']}],\n", " scoring='neg_root_mean_squared_error')
Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe...\n", " 'total_rooms', 'population',\n", " 'households',\n", " 'median_income']),\n", " ('geo',\n", " ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('svr', SVR())])
ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio...\n", " ['total_bedrooms', 'total_rooms', 'population',\n", " 'households', 'median_income']),\n", " ('geo', ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])
['total_bedrooms', 'total_rooms']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_rooms', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['population', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_bedrooms', 'total_rooms', 'population', 'households', 'median_income']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out='one-to-one', func=<ufunc 'log'>)
StandardScaler()
['latitude', 'longitude']
ClusterSimilarity(random_state=42)
<sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>
SimpleImputer(strategy='most_frequent')
OneHotEncoder(handle_unknown='ignore')
['housing_median_age']
SimpleImputer(strategy='median')
StandardScaler()
SVR()
RandomizedSearchCV(cv=3,\n", " estimator=Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_...\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('svr', SVR())]),\n", " n_iter=50,\n", " param_distributions={'svr__C': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x7f6bd730b1c0>,\n", " 'svr__gamma': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x7f6bd73278b0>,\n", " 'svr__kernel': ['linear', 'rbf']},\n", " random_state=42, scoring='neg_root_mean_squared_error')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomizedSearchCV(cv=3,\n", " estimator=Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_...\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('svr', SVR())]),\n", " n_iter=50,\n", " param_distributions={'svr__C': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x7f6bd730b1c0>,\n", " 'svr__gamma': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x7f6bd73278b0>,\n", " 'svr__kernel': ['linear', 'rbf']},\n", " random_state=42, scoring='neg_root_mean_squared_error')
Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe...\n", " 'total_rooms', 'population',\n", " 'households',\n", " 'median_income']),\n", " ('geo',\n", " ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('svr', SVR())])
ColumnTransformer(remainder=Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio...\n", " ['total_bedrooms', 'total_rooms', 'population',\n", " 'households', 'median_income']),\n", " ('geo', ClusterSimilarity(random_state=42),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])
['total_bedrooms', 'total_rooms']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_rooms', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['population', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_bedrooms', 'total_rooms', 'population', 'households', 'median_income']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out='one-to-one', func=<ufunc 'log'>)
StandardScaler()
['latitude', 'longitude']
ClusterSimilarity(random_state=42)
<sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>
SimpleImputer(strategy='most_frequent')
OneHotEncoder(handle_unknown='ignore')
['housing_median_age']
SimpleImputer(strategy='median')
StandardScaler()
SVR()
RandomizedSearchCV(cv=3,\n", " estimator=Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)),\n", " ('standardscaler',\n", " StandardSc...\n", " param_distributions={'preprocessing__geo__estimator__n_neighbors': range(1, 30),\n", " 'preprocessing__geo__estimator__weights': ['distance',\n", " 'uniform'],\n", " 'svr__C': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x7f6bc984c8b0>,\n", " 'svr__gamma': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x7f6bc984ca90>},\n", " random_state=42, scoring='neg_root_mean_squared_error')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomizedSearchCV(cv=3,\n", " estimator=Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)),\n", " ('standardscaler',\n", " StandardSc...\n", " param_distributions={'preprocessing__geo__estimator__n_neighbors': range(1, 30),\n", " 'preprocessing__geo__estimator__weights': ['distance',\n", " 'uniform'],\n", " 'svr__C': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x7f6bc984c8b0>,\n", " 'svr__gamma': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x7f6bc984ca90>},\n", " random_state=42, scoring='neg_root_mean_squared_error')
Pipeline(steps=[('preprocessing',\n", " ColumnTransformer(transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " ['total_bedrooms',\n", " 'total...\n", " FeatureFromRegressor(estimator=KNeighborsRegressor(n_neighbors=3,\n", " weights='distance')),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])),\n", " ('svr', SVR(C=157055.10989448498, gamma=0.26497040005002437))])
ColumnTransformer(transformers=[('bedrooms',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('functiontransformer',\n", " FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " ['total_bedrooms', 'total_rooms']),\n", " ('rooms_per_house',\n", " Pipe...\n", " 'households', 'median_income']),\n", " ('geo',\n", " FeatureFromRegressor(estimator=KNeighborsRegressor(n_neighbors=3,\n", " weights='distance')),\n", " ['latitude', 'longitude']),\n", " ('cat',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'))]),\n", " <sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>)])
['total_bedrooms', 'total_rooms']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_rooms', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['population', 'households']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out=<function ratio_name at 0x7f6bdbe6cee0>,\n", " func=<function column_ratio at 0x7f6bdbe6cf70>)
StandardScaler()
['total_bedrooms', 'total_rooms', 'population', 'households', 'median_income']
SimpleImputer(strategy='median')
FunctionTransformer(feature_names_out='one-to-one', func=<ufunc 'log'>)
StandardScaler()
['latitude', 'longitude']
KNeighborsRegressor(n_neighbors=3, weights='distance')
KNeighborsRegressor(n_neighbors=3, weights='distance')
<sklearn.compose._column_transformer.make_column_selector object at 0x7f6bd716ed70>
SimpleImputer(strategy='most_frequent')
OneHotEncoder(handle_unknown='ignore')
SVR(C=157055.10989448498, gamma=0.26497040005002437)