From 1627615c291024f159f6699768fbc13662e83bbd Mon Sep 17 00:00:00 2001 From: Christian Cleberg Date: Wed, 24 Jan 2024 13:36:20 -0600 Subject: update path and add method to delete extra header rows --- ingest.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ingest.py b/ingest.py index daa7041..f864d78 100644 --- a/ingest.py +++ b/ingest.py @@ -4,7 +4,7 @@ import sqlite3 import os # Create the database file -connection = sqlite3.connect('./raw_data/ingress.db') +connection = sqlite3.connect('./raw_data/test.db') # Creating a cursor object to execute SQL queries cursor = connection.cursor() @@ -34,14 +34,14 @@ create_table = '''CREATE TABLE incidents( cursor.execute(create_table) # Point to the data directory -directory = os.fsencode("../raw_data/") +directory = os.fsencode("./raw_data/") # Loop through all raw data files for file in os.listdir(directory): filename = os.fsdecode(file) if filename.endswith(".csv"): # Opening the file - file = open("../raw_data/" + filename) + file = open("./raw_data/" + filename) # Reading the contents of the file contents = csv.reader(file) @@ -58,6 +58,10 @@ for file in os.listdir(directory): else: continue +# Delete extra copies of the header row that were inserted +delete_headers = "DELETE FROM incidents WHERE rb = 'RB Number'" +cursor.execute(delete_headers) + # Test query to see if the data loaded select_all = "SELECT * FROM incidents" rows = cursor.execute(select_all).fetchall() -- cgit v1.2.3-70-g09d2