diff options
author | Christian Cleberg <hello@cleberg.net> | 2025-06-04 13:24:22 -0500 |
---|---|---|
committer | Christian Cleberg <hello@cleberg.net> | 2025-06-04 13:24:22 -0500 |
commit | 8bbac67df8450b021914725a756a029667b8f585 (patch) | |
tree | 1e5f13a29e6a4542fb5976dc74b15db2e89b2a32 | |
download | crumb-8bbac67df8450b021914725a756a029667b8f585.tar.gz crumb-8bbac67df8450b021914725a756a029667b8f585.tar.bz2 crumb-8bbac67df8450b021914725a756a029667b8f585.zip |
feat: initial commit
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | .projectile | 0 | ||||
-rw-r--r-- | README.org | 100 | ||||
-rw-r--r-- | crumb_extension/background.js | 26 | ||||
-rw-r--r-- | crumb_extension/manifest.json | 10 | ||||
-rw-r--r-- | requirements.txt | 1 | ||||
-rw-r--r-- | screenshots/devtools.png | bin | 0 -> 384302 bytes | |||
-rw-r--r-- | screenshots/history.png | bin | 0 -> 322836 bytes | |||
-rw-r--r-- | screenshots/search.png | bin | 0 -> 272389 bytes | |||
-rw-r--r-- | search.py | 27 | ||||
-rw-r--r-- | server.py | 50 |
11 files changed, 215 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f5e96db --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +venv
\ No newline at end of file diff --git a/.projectile b/.projectile new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/.projectile diff --git a/README.org b/README.org new file mode 100644 index 0000000..cfa7c13 --- /dev/null +++ b/README.org @@ -0,0 +1,100 @@ +#+TITLE: Crumb +#+AUTHOR: Christian Cleberg +#+OPTIONS: toc:nil + +Crumb is a local-only browser history tracker. It consists of two parts: a +WebExtension-based browser plugin and a local HTTP server. Crumb logs each +visited web page to a plaintext file in Org-mode format. + +* Components + +- Browser extension (WebExtension API, JavaScript) +- Local HTTP listener (Python + Flask) +- Plaintext log file (~/.crumb/history.log) + +* Functionality + +Each time a page is loaded in the browser, the extension sends the page metadata +to the local server. The server appends this data to a structured log file. + +Data captured per page: + +- Page title +- URL +- Timestamp (UTC) +- Hostname +- Path +- Optional: query string, tab ID, window ID, favicon URL + +The log is formatted as an Org-mode file, with one top-level heading per visit +and standard =:PROPERTIES:= metadata. + +* Requirements + +- Python 3.x +- Flask +- A Chromium-based or Firefox browser with extension developer mode enabled + +* Usage + +1. Start the local listener: + + #+BEGIN_SRC shell + python3 -m venv venv + source ./venv/bin/activate + pip install -r requirements.txt + python server.py + #+END_SRC + +2. Load the extension into your browser: + + - Enable Developer Mode + - Load the =crumb_extension/= directory + +3. Browse the web. Entries will appear in =~/.crumb/history.org= + +4. Search the log using: + + #+BEGIN_SRC shell + python search.py <term> + #+END_SRC + +* Example Log Entry + +#+BEGIN_SRC org +* Example Domain +:PROPERTIES: +:URL: http://example.com +:TIMESTAMP: 2025-06-04T20:31:01.123456 +:HOST: example.com +:PATH: / +:TAB: 143 +:WINDOW: 1 +:FAVICON: https://example.com/favicon.ico +:END: +#+END_SRC + +* Preview Images + +Screenshots of Crumb in use. + +#+CAPTION: Developer Console Output +[[./screenshots/devtools.png]] + +#+CAPTION: Org-Mode History Log +[[./screenshots/history.png]] + +#+CAPTION: Search Utility in Terminal +[[./screenshots/search.png]] + +* Limitations + +- Browser extensions cannot write files directly; communication requires the + local server. +- History is stored in plaintext and is not encrypted. You can bolt-on a + third-party encryption tool, if required. +- Data is stored locally and is never transmitted externally. + +* License + +GPL 3.0 License. See LICENSE file. diff --git a/crumb_extension/background.js b/crumb_extension/background.js new file mode 100644 index 0000000..c916c95 --- /dev/null +++ b/crumb_extension/background.js @@ -0,0 +1,26 @@ +chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => { + if (changeInfo.status !== 'complete' || !tab.url.startsWith("http")) return; + + const url = new URL(tab.url); + + const payload = { + title: tab.title, + url: tab.url, + hostname: url.hostname, + path: url.pathname, + query: url.search, + tabId: tab.id, + windowId: tab.windowId, + favIconUrl: tab.favIconUrl || null + }; + + console.log("Crumb: Sending payload", payload); + + fetch("http://localhost:3555", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload) + }).catch(err => { + console.error("Crumb: Failed to reach server", err); + }); +});
\ No newline at end of file diff --git a/crumb_extension/manifest.json b/crumb_extension/manifest.json new file mode 100644 index 0000000..1895b57 --- /dev/null +++ b/crumb_extension/manifest.json @@ -0,0 +1,10 @@ +{ + "manifest_version": 3, + "name": "Crumb", + "version": "0.1", + "description": "Local history tracker that sends visit logs to a local server.", + "permissions": ["tabs", "history"], + "background": { + "service_worker": "background.js" + } +}
\ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8ab6294 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +flask
\ No newline at end of file diff --git a/screenshots/devtools.png b/screenshots/devtools.png Binary files differnew file mode 100644 index 0000000..170f033 --- /dev/null +++ b/screenshots/devtools.png diff --git a/screenshots/history.png b/screenshots/history.png Binary files differnew file mode 100644 index 0000000..b78b8fb --- /dev/null +++ b/screenshots/history.png diff --git a/screenshots/search.png b/screenshots/search.png Binary files differnew file mode 100644 index 0000000..97efdc4 --- /dev/null +++ b/screenshots/search.png diff --git a/search.py b/search.py new file mode 100644 index 0000000..098e392 --- /dev/null +++ b/search.py @@ -0,0 +1,27 @@ +import sys +import os + +LOG_PATH = os.path.expanduser("~/.crumb/history.org") + +def search_log(query): + if not os.path.exists(LOG_PATH): + print("No history file found.") + return + + with open(LOG_PATH, "r") as f: + entries = f.read().split("* ")[1:] # split on org-mode headings + + found = 0 + for entry in entries: + if query.lower() in entry.lower(): + print(f"* {entry.strip()}\n") + found += 1 + + if found == 0: + print("No matches found.") + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: search_crumb.py <search term>") + else: + search_log(sys.argv[1])
\ No newline at end of file diff --git a/server.py b/server.py new file mode 100644 index 0000000..31c0d4c --- /dev/null +++ b/server.py @@ -0,0 +1,50 @@ +from flask import Flask, request, jsonify, make_response +import os +from datetime import datetime + +app = Flask(__name__) +LOG_PATH = os.path.expanduser("~/.crumb/history.org") + +os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True) + +@app.after_request +def add_cors_headers(response): + response.headers['Access-Control-Allow-Origin'] = '*' + response.headers['Access-Control-Allow-Headers'] = 'Content-Type' + return response + +@app.route('/', methods=['POST', 'OPTIONS']) +def log_visit(): + if request.method == 'OPTIONS': + return '', 204 + + data = request.json + title = data.get("title", "No Title") + url = data.get("url", "No URL") + hostname = data.get("hostname", "") + path = data.get("path", "") + query = data.get("query", "") + tab_id = data.get("tabId", "") + window_id = data.get("windowId", "") + favicon = data.get("favIconUrl", "") + timestamp = datetime.utcnow().isoformat() + + with open(LOG_PATH, "a") as f: + f.write(f"* {title}\n") + f.write(":PROPERTIES:\n") + f.write(f":URL: {url}\n") + f.write(f":TIMESTAMP: {timestamp}\n") + f.write(f":HOST: {hostname}\n") + f.write(f":PATH: {path}\n") + if query: + f.write(f":QUERY: {query}\n") + f.write(f":TAB: {tab_id}\n") + f.write(f":WINDOW: {window_id}\n") + if favicon: + f.write(f":FAVICON: {favicon}\n") + f.write(":END:\n\n") + + return '', 204 + +if __name__ == '__main__': + app.run(port=3555)
\ No newline at end of file |