ML cheatsheet
basic
importing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import accuracy_score, confusion_matrix
from google.colab import drive
drive.mount('/content/drive')
#uploaded = files.upload()
filepath = '/content/drive/My Drive/Tai-Yu Chen'
os.chdir(filepath)
prelim info
print(data.shape)
data.describe()
visualizing data/plotting
histogram
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
plt.hist(medical_data['bmi'], edgecolor = 'black')
plt.xlabel('bmi')
plt.show()
box plot (1 attribute)
plt.figure(figsize=(15,4))
sns.boxplot(x=medical_data['bmi'])
scatter plot (2 attributes)
plt.figure(figsize=(12,10))
sns.scatterplot(medical_data['bmi'], medical_data['charges'], hue=medical_data['sex'])
plt.show()
pie charts
categories = data.groupby(by='category',as_index=False).agg({'unique_col':'count'})
plt.pie(x=categories['unique_col'], labels=categories['category']);
creating models