datasets

Published by onesixx on

.

import seaborn as sns
titanic = sns.load_dataset('titanic')

sns.get_dataset_names()
['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic']

for i in sns.get_dataset_names():
    print(i)  
    df  = sns.load_dataset(i)
    print(df.head())

import seaborn as sns
import pandas as pd

# Load dataset
df = sns.load_dataset("dowjones")
# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])
# Convert 'Date' column to string format with desired date format
df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')

plotly.express

import plotly.express as px
iris = px.data.iris()
from palmerpenguins import load_penguins
penguins = load_penguins()
# pip install scikit-learn

import pandas as pd

### 1. scikit-learn datasets
import sklearn.datasets as sk_datasets
[i for i in dir(sk_datasets) if 'load' in i]

iris = sk_datasets.load_iris()[0]  # Access the first element of the tuple
print(iris.DESCR)
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['Species'] = iris.target

### 2. statsmodels datasets
import statsmodels.api as sm
[i for i in dir(sm.datasets) if not i.startswith('_')]

df = sm.datasets.get_rdataset('iris').data

### 3. vega_datasets
import vega_datasets as vega
dir(vega.data)

df = vega.data.iris()

# from tabulate import tabulate
# print(tabulate(df.sample(10), headers='keys'))
Categories: Python Basic

onesixx

Blog Owner

Subscribe
Notify of
guest

0 Comments
Inline Feedbacks
View all comments
0
Would love your thoughts, please comment.x
()
x