aboutsummaryrefslogtreecommitdiff
path: root/scripts/load.py
blob: ea362cba3cc5b537d906c49957feeda70408c744 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Import required modules
import csv
import sqlite3
import os

# Create the database file
connection = sqlite3.connect('../raw_data/ingress.db')

# Creating a cursor object to execute SQL queries
cursor = connection.cursor()

# Table Definition
# rb          = RB Number
# date        = Reported Date
# time        = Reported Time
# description = Statute/Ordinance Description
# location    = Occurred Location
# district    = Occurred District
# lat         = Occurred Block LAT
# lon         = Occurred Block LON
create_table = '''CREATE TABLE incidents(
				id INTEGER PRIMARY KEY AUTOINCREMENT,
                rb TEXT NOT NULL,
				date TEXT NOT NULL,
				time TEXT NOT NULL,
                description TEXT NOT NULL,
                location TEXT NOT NULL,
                district TEXT NOT NULL,
                lat REAL NOT NULL,
                lon REAL NOT NULL);
				'''

# Create the table
cursor.execute(create_table)

# Point to the data directory
directory = os.fsencode("../raw_data/")

# Loop through all raw data files
for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.endswith(".csv"): 
        # Opening the file
        file = open("../raw_data/" + filename)

        # Reading the contents of the file
        contents = csv.reader(file)

        # SQL query to insert data into the
        # table
        insert_records = "INSERT INTO incidents (rb, date, time, description, location, district, lat, lon) VALUES(?, ?, ?, ?, ?, ?, ?, ?)"

        # Importing the contents of the file 
        # into our table
        cursor.executemany(insert_records, contents)
        print("Inserted data from: ", filename)
        continue
    else:
        continue

# Test query to see if the data loaded
select_all = "SELECT * FROM incidents"
rows = cursor.execute(select_all).fetchall()

# Output to the console screen
for r in rows:
	print(r)

# Commit the changes
connection.commit()

# Close the database connection
connection.close()