diff options
author | Christian Cleberg <hello@cleberg.net> | 2024-10-19 11:26:23 -0500 |
---|---|---|
committer | Christian Cleberg <hello@cleberg.net> | 2024-10-19 11:26:23 -0500 |
commit | 9bf4bc59ee673fb72e56104aec94282ec55bb7ee (patch) | |
tree | c6ca131bf66368e44a22178b8312d6d130c0406c /sample.py | |
parent | e273f1a32f4d3372b1692645b9704387d5eacc77 (diff) | |
download | audit-tools-9bf4bc59ee673fb72e56104aec94282ec55bb7ee.tar.gz audit-tools-9bf4bc59ee673fb72e56104aec94282ec55bb7ee.tar.bz2 audit-tools-9bf4bc59ee673fb72e56104aec94282ec55bb7ee.zip |
add sample.py
Diffstat (limited to 'sample.py')
-rw-r--r-- | sample.py | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/sample.py b/sample.py new file mode 100644 index 0000000..a2435d8 --- /dev/null +++ b/sample.py @@ -0,0 +1,30 @@ +# Import packages +import pandas as pd + +# Define the sample size +sample_size = 25 + +# Import the data to a pandas DataFrame +df = pd.read_csv("FILENAME_GOES_HERE.csv") + +# ALTERNATIVE: If you use Excel, use this instead. Supports xls, xlsx, xlsm, +# xlsb, odf, ods and odt file extensions. +# df = pd.read_excel("FILENAME_GOES_HERE.xlsx") + +# Print totals prior to sampling +print("Dataframe size (rows, columns): ", df.shape) + +# Sample +sample = df.sample(sample_size) +print("Sample size: ", sample_size) +print("Sample:\n", sample) + +# ALTERNATIVE: Replacement Samples +# +# If you want replacement samples (e.g., 10 samples & 3 replacements), you will +# need to increase sample size to the total you want (e.g., 13). If that is +# larger than the population, you will need to use the `replace=True` parameter. +# +# # Sample Size: 25 + 5 replacement samples +# sample_size = 30 +# sample = df.sample(30, replace=True) |