import pandas as pd
from pyspark import SparkContext
from openpyxl import load_workbook
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
本次分享一个Python工具pyforest,只需要import pyforest一行代码即可导入所有依赖的python包(机器上已安装过的),
支持Python 3.6+之后的版本,因为pyforest开发者是py流行的字符串格式化机制f-strings的忠实粉丝。
pip install --upgrade pyforest -i https://pypi.tuna.tsinghua.edu.cn/simple
以使用seaborn可视化为例,
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
转载请标明来源!转载请标明来源!转载请标明来源!
@Time : 2023年10月
@Author : 公众号:pythonic生物人
@Contact : 公众号:pythonic生物人
@Desc : Demo for pyforest
'''
#第1步:导入依赖包
import pyforest
#第2步:设置绘图风格、文本字体
sns.set_theme()
mpl.rcParams['font.family'] = 'Songti SC'
#第3步:数据读取
tips = pd.read_csv('./sns_data/tips.csv')
#第4步:seaborn绘图
sns.relplot(data=tips,
x="消费金额 ($)",
y="小费金额 ($)",
col="就餐时间",
hue="是否吸烟",
style="是否吸烟",
size="一起就餐人数 (个)")
可看到仅import pyforest一行代码就一次性导入了seaborn、pandas、matplotlib包,省略了重复使用import语句,即下面这些代码,
import seaborn as sns
import pandas as pd
import matplotlib as mpl
pyforest覆盖99%以上主流Python Data Science libraries例如,import pandas
as pd,
import numpy
as np
,import matplotlib.pyplot
as plt
, from sklearn.linear_model import LinearRegression等等。
注意:pyforest中导入的包遵循python社区默认的简称,如pandas>pd、seaborn>sns、matplotlib.pyplot>plt等等。
具体支持哪些包,可通过dir(pyforest)或者https://github.com/8080labs/pyforest/blob/master/src/pyforest/_imports.py查看,详细如下,
### Data Wrangling
pd = LazyImport("import pandas as pd")
np = LazyImport("import numpy as np")
dd = LazyImport("from dask import dataframe as dd")
SparkContext = LazyImport("from pyspark import SparkContext")
load_workbook = LazyImport("from openpyxl import load_workbook")
open_workbook = LazyImport("from xlrd import open_workbook")
wr = LazyImport("import awswrangler as wr")
### Data Visualization and Plotting
mpl = LazyImport("import matplotlib as mpl")
plt = LazyImport("import matplotlib.pyplot as plt")
sns = LazyImport("import seaborn as sns")
py = LazyImport("import plotly as py")
go = LazyImport("import plotly.graph_objs as go")
px = LazyImport("import plotly.express as px")
dash = LazyImport("import dash")
bokeh = LazyImport("import bokeh")
alt = LazyImport("import altair as alt")
pydot = LazyImport("import pydot")
### Image processing
cv2 = LazyImport("import cv2")
skimage = LazyImport("import skimage")
Image = LazyImport("from PIL import Image")
imutils = LazyImport("import imutils")
# statistics
statistics = LazyImport("import statistics")
stats = LazyImport("from scipy import stats")
sm = LazyImport("import statsmodels.api as sm")
### Time-Series Forecasting
fbprophet = LazyImport("import fbprophet")
Prophet = LazyImport("from fbprophet import Prophet")
ARIMA = LazyImport("from statsmodels.tsa.arima_model import ARIMA")
### Machine Learning
sklearn = LazyImport("import sklearn")
LinearRegression = LazyImport("from sklearn.linear_model import LinearRegression")
LogisticRegression = LazyImport("from sklearn.linear_model import LogisticRegression")
Lasso = LazyImport("from sklearn.linear_model import Lasso")
LassoCV = LazyImport("from sklearn.linear_model import LassoCV")
Ridge = LazyImport("from sklearn.linear_model import Ridge")
RidgeCV = LazyImport("from sklearn.linear_model import RidgeCV")
ElasticNet = LazyImport("from sklearn.linear_model import ElasticNet")
ElasticNetCV = LazyImport("from sklearn.linear_model import ElasticNetCV")
PolynomialFeatures = LazyImport("from sklearn.preprocessing import PolynomialFeatures")
StandardScaler = LazyImport("from sklearn.preprocessing import StandardScaler")
MinMaxScaler = LazyImport("from sklearn.preprocessing import MinMaxScaler")
RobustScaler = LazyImport("from sklearn.preprocessing import RobustScaler")
OneHotEncoder = LazyImport("from sklearn.preprocessing import OneHotEncoder")
LabelEncoder = LazyImport("from sklearn.preprocessing import LabelEncoder")
TSNE = LazyImport("from sklearn.manifold import TSNE")
PCA = LazyImport("from sklearn.decomposition import PCA")
SimpleImputer = LazyImport("from sklearn.impute import SimpleImputer")
train_test_split = LazyImport("from sklearn.model_selection import train_test_split")
cross_val_score = LazyImport("from sklearn.model_selection import cross_val_score")
GridSearchCV = LazyImport("from sklearn.model_selection import GridSearchCV")
RandomizedSearchCV = LazyImport("from sklearn.model_selection import RandomizedSearchCV")
KFold = LazyImport("from sklearn.model_selection import KFold")
StratifiedKFold = LazyImport("from sklearn.model_selection import StratifiedKFold")
svm = LazyImport("from sklearn import svm")
GradientBoostingClassifier = LazyImport(
"from sklearn.ensemble import GradientBoostingClassifier"
)
GradientBoostingRegressor = LazyImport(
"from sklearn.ensemble import GradientBoostingRegressor"
)
RandomForestClassifier = LazyImport(
"from sklearn.ensemble import RandomForestClassifier"
)
RandomForestRegressor = LazyImport("from sklearn.ensemble import RandomForestRegressor")
TfidfVectorizer = LazyImport(
"from sklearn.feature_extraction.text import TfidfVectorizer"
)
CountVectorizer = LazyImport(
"from sklearn.feature_extraction.text import CountVectorizer"
)
metrics = LazyImport("from sklearn import metrics")
sg = LazyImport("from scipy import signal as sg")
# Clustering
KMeans = LazyImport ("from sklearn.cluster import KMeans")
# Gradient Boosting Decision Tree
xgb = LazyImport("import xgboost as xgb")
lgb = LazyImport("import lightgbm as lgb")
# TODO: add all the other most important sklearn objects
# TODO: add separate sections within machine learning viz. Classification, Regression, Error Functions, Clustering
# Deep Learning
tf = LazyImport("import tensorflow as tf")
keras = LazyImport("import keras")
torch = LazyImport("import torch")
fastai = LazyImport("import fastai")
# NLP
nltk = LazyImport("import nltk")
gensim = LazyImport("import gensim")
spacy = LazyImport("import spacy")
re = LazyImport("import re")
textblob = LazyImport("import textblob")
### Helper
sys = LazyImport("import sys")
os = LazyImport("import os")
re = LazyImport("import re")
glob = LazyImport("import glob")
Path = LazyImport("from pathlib import Path")
pickle = LazyImport("import pickle")
dt = LazyImport("import datetime as dt")
tqdm = LazyImport("import tqdm")
pyforest中导入的包遵循python社区默认的简称,如pandas>pd、seaborn>sns、matplotlib.pyplot>plt等等。
如果想个性化自己的包导入简称,可在~/.pyforest/user_imports.py中添加自己的个性化设置即可,
例如,一般是import pandas as pd,如果想设置为import pandas as pd_test,
~/.pyforest/user_imports.py中添加import pandas as pd_test保存即可。
同样当pyforest不包含自己的包时,也可以以上面同样的方法添加。
进一步学习:https://github.com/8080labs/pyforest
往期精彩回顾
交流群
欢迎加入机器学习爱好者微信群一起和同行交流,目前有机器学习交流群、博士群、博士申报交流、CV、NLP等微信群,请扫描下面的微信号加群,备注:”昵称-学校/公司-研究方向“,例如:”张小明-浙大-CV“。请按照格式备注,否则不予通过。添加成功后会根据研究方向邀请进入相关微信群。请勿在群内发送广告,否则会请出群,谢谢理解~(也可以加入机器学习交流qq群772479961)