QuickstartΒΆ
Using fast-select is simple and seamless for anyone familiar with Scikit-Learn.
from fast_select import MultiSURF
from sklearn.datasets import make_classification
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
# 1. Generate a synthetic dataset
X, y = make_classification(
n_samples=500,
n_features=1000,
n_informative=20,
n_redundant=100,
random_state=42
)
# 2. Use the MultiSURF estimator to select the top 15 features
selector = MultiSURF(n_features_to_select=15)
X_selected = selector.fit_transform(X, y)
print(f"Original feature count: {X.shape[1]}")
print(f"Selected feature count: {X_selected.shape[1]}")
print(f"Top 15 feature indices: {selector.top_features_}")
# 3. Integrate into a Scikit-Learn Pipeline
pipeline = Pipeline([
('scaler', StandardScaler()),
('feature_selector', MultiSURF(n_features_to_select=10, backend='cpu')),
('classifier', LogisticRegression())
])
# Fit the pipeline
# pipeline.fit(X, y)