Data Cleaning
Datetime
from datetime import datetime, date
date = "2014-03-02"
datetime.strptime(date, '%Y-%m-%d')
>> 2014-03-02 00:00:00
datetime.strptime(date, '%Y-%m-%d').date()
>> 2014-03-02
date = "Jun 1 2005 12:00PM"
datetime.strptime(date, '%b %d %Y %I:%M%p').date
>> 2005-06-01 12:00:00
date = "June 1 2005 12:00PM"
datetime.strptime(date, '%B %d %Y %I:%M%p').date
>> 2005-06-01 12:00:00
date = "Jun 1 2005 1:00"
datetime.strptime(date, '%b %d %Y %I:%M').date
>> 2005-06-01 01:00:00
%b %d %Y %I:%M%p'
Clean Column Names
df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
Slugify
from slugify import slugify, Slugify, UniqueSlugify
custom_slugify = Slugify(to_lower=True)
custom_slugify.separator = '_'
new = [custom_slugify(col) for col in df.columns]
df.columns = new
Last updated