aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Cleberg <hello@cleberg.net>2024-01-24 13:36:20 -0600
committerChristian Cleberg <hello@cleberg.net>2024-01-24 13:36:20 -0600
commit1627615c291024f159f6699768fbc13662e83bbd (patch)
treeaca9c8d62ca6699e1121b9bd0ce2a3956e8d6290
parent826a705dfa9694f90357e33fa862f8e1bff2e042 (diff)
downloadomaha-incidents-1627615c291024f159f6699768fbc13662e83bbd.tar.gz
omaha-incidents-1627615c291024f159f6699768fbc13662e83bbd.tar.bz2
omaha-incidents-1627615c291024f159f6699768fbc13662e83bbd.zip
update path and add method to delete extra header rows
-rw-r--r--ingest.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/ingest.py b/ingest.py
index daa7041..f864d78 100644
--- a/ingest.py
+++ b/ingest.py
@@ -4,7 +4,7 @@ import sqlite3
import os
# Create the database file
-connection = sqlite3.connect('./raw_data/ingress.db')
+connection = sqlite3.connect('./raw_data/test.db')
# Creating a cursor object to execute SQL queries
cursor = connection.cursor()
@@ -34,14 +34,14 @@ create_table = '''CREATE TABLE incidents(
cursor.execute(create_table)
# Point to the data directory
-directory = os.fsencode("../raw_data/")
+directory = os.fsencode("./raw_data/")
# Loop through all raw data files
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".csv"):
# Opening the file
- file = open("../raw_data/" + filename)
+ file = open("./raw_data/" + filename)
# Reading the contents of the file
contents = csv.reader(file)
@@ -58,6 +58,10 @@ for file in os.listdir(directory):
else:
continue
+# Delete extra copies of the header row that were inserted
+delete_headers = "DELETE FROM incidents WHERE rb = 'RB Number'"
+cursor.execute(delete_headers)
+
# Test query to see if the data loaded
select_all = "SELECT * FROM incidents"
rows = cursor.execute(select_all).fetchall()