aboutsummaryrefslogtreecommitdiff
path: root/sample.py
blob: a2435d8126df3e7d76b9e00bbd1ae5c2e1e3eed9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Import packages
import pandas as pd

# Define the sample size
sample_size = 25

# Import the data to a pandas DataFrame
df = pd.read_csv("FILENAME_GOES_HERE.csv")

# ALTERNATIVE: If you use Excel, use this instead. Supports xls, xlsx, xlsm,
# xlsb, odf, ods and odt file extensions.
# df = pd.read_excel("FILENAME_GOES_HERE.xlsx")

# Print totals prior to sampling
print("Dataframe size (rows, columns): ", df.shape)

# Sample
sample = df.sample(sample_size)
print("Sample size: ", sample_size)
print("Sample:\n", sample)

# ALTERNATIVE: Replacement Samples
#
# If you want replacement samples (e.g., 10 samples & 3 replacements), you will
# need to increase sample size to the total you want (e.g., 13). If that is
# larger than the population, you will need to use the `replace=True` parameter.
#
# # Sample Size: 25 + 5 replacement samples
# sample_size = 30
# sample = df.sample(30, replace=True)