From a94928e85b0830be10a46055121f7e86775ddb79 Mon Sep 17 00:00:00 2001 From: Christian Cleberg Date: Sat, 19 Oct 2024 11:32:25 -0500 Subject: move sample script to sampling dir --- sample.py | 30 ------------------------------ sampling/sample.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 30 deletions(-) delete mode 100644 sample.py create mode 100644 sampling/sample.py diff --git a/sample.py b/sample.py deleted file mode 100644 index a2435d8..0000000 --- a/sample.py +++ /dev/null @@ -1,30 +0,0 @@ -# Import packages -import pandas as pd - -# Define the sample size -sample_size = 25 - -# Import the data to a pandas DataFrame -df = pd.read_csv("FILENAME_GOES_HERE.csv") - -# ALTERNATIVE: If you use Excel, use this instead. Supports xls, xlsx, xlsm, -# xlsb, odf, ods and odt file extensions. -# df = pd.read_excel("FILENAME_GOES_HERE.xlsx") - -# Print totals prior to sampling -print("Dataframe size (rows, columns): ", df.shape) - -# Sample -sample = df.sample(sample_size) -print("Sample size: ", sample_size) -print("Sample:\n", sample) - -# ALTERNATIVE: Replacement Samples -# -# If you want replacement samples (e.g., 10 samples & 3 replacements), you will -# need to increase sample size to the total you want (e.g., 13). If that is -# larger than the population, you will need to use the `replace=True` parameter. -# -# # Sample Size: 25 + 5 replacement samples -# sample_size = 30 -# sample = df.sample(30, replace=True) diff --git a/sampling/sample.py b/sampling/sample.py new file mode 100644 index 0000000..a2435d8 --- /dev/null +++ b/sampling/sample.py @@ -0,0 +1,30 @@ +# Import packages +import pandas as pd + +# Define the sample size +sample_size = 25 + +# Import the data to a pandas DataFrame +df = pd.read_csv("FILENAME_GOES_HERE.csv") + +# ALTERNATIVE: If you use Excel, use this instead. Supports xls, xlsx, xlsm, +# xlsb, odf, ods and odt file extensions. +# df = pd.read_excel("FILENAME_GOES_HERE.xlsx") + +# Print totals prior to sampling +print("Dataframe size (rows, columns): ", df.shape) + +# Sample +sample = df.sample(sample_size) +print("Sample size: ", sample_size) +print("Sample:\n", sample) + +# ALTERNATIVE: Replacement Samples +# +# If you want replacement samples (e.g., 10 samples & 3 replacements), you will +# need to increase sample size to the total you want (e.g., 13). If that is +# larger than the population, you will need to use the `replace=True` parameter. +# +# # Sample Size: 25 + 5 replacement samples +# sample_size = 30 +# sample = df.sample(30, replace=True) -- cgit v1.2.3-70-g09d2