Selecting numerical columns

Imports

import pandas as pd
import numpy as np

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

Create data

data = {'label': ['dog', 'cat', 'catdog', 'dog', 'catdog'], 'score': [1, 2, 3, 4, 5]}
df = pd.DataFrame(data, columns = ["label", "score"])
df

label score
0 dog 1
1 cat 2
2 catdog 3
3 dog 4
4 catdog 5

Define numerical columns

numerical = list(df.select_dtypes('number').columns)
numerical
['score']

Create custom transformer (fit and transform methods)

class ColumnSelector(BaseEstimator, TransformerMixin):
    """Select only specified columns."""
    def __init__(self, columns):
        self.columns = columns
        
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return X[self.columns]

Create numerical pipeline

num_pipeline = Pipeline([('num_selector', ColumnSelector(numerical))])

Fit pipeline

num_pipeline.fit(df)
Pipeline(memory=None,
         steps=[('num_selector', ColumnSelector(columns=['score']))],
         verbose=False)

Transform pipeline

num_pipeline.transform(df)

score
0 1
1 2
2 3
3 4
4 5

From