mirror of
https://github.com/alabianca/OT_Reportv2.git
synced 2025-12-16 19:35:02 -06:00
worker thread and cleanup
This commit is contained in:
156
.idea/workspace.xml
generated
156
.idea/workspace.xml
generated
@@ -2,8 +2,14 @@
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="7bbe5005-15ef-4d5e-b36b-58084d0f70eb" name="Default Changelist" comment="">
|
||||
<change afterPath="$PROJECT_DIR$/google/gmailWorker.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/HtmlReader.py" beforeDir="false" afterPath="$PROJECT_DIR$/HtmlReader.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/Untitled.ipynb" beforeDir="false" afterPath="$PROJECT_DIR$/Untitled.ipynb" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/config.json" beforeDir="false" afterPath="$PROJECT_DIR$/config.json" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/events.csv" beforeDir="false" afterPath="$PROJECT_DIR$/events.csv" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/google/gmailApi.py" beforeDir="false" afterPath="$PROJECT_DIR$/google/gmailApi.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/htmlParser.py" beforeDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
|
||||
</list>
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
@@ -24,13 +30,13 @@
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.extensions.open">
|
||||
<counts>
|
||||
<entry key="csv" value="1" />
|
||||
<entry key="csv" value="2" />
|
||||
<entry key="gitattributes" value="2" />
|
||||
<entry key="gitignore" value="1" />
|
||||
<entry key="html" value="13" />
|
||||
<entry key="ipynb" value="3" />
|
||||
<entry key="json" value="7" />
|
||||
<entry key="py" value="12" />
|
||||
<entry key="py" value="15" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.types.open">
|
||||
@@ -38,8 +44,8 @@
|
||||
<entry key="HTML" value="13" />
|
||||
<entry key="IPNB" value="3" />
|
||||
<entry key="JSON" value="7" />
|
||||
<entry key="PLAIN_TEXT" value="4" />
|
||||
<entry key="Python" value="12" />
|
||||
<entry key="PLAIN_TEXT" value="5" />
|
||||
<entry key="Python" value="15" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.extensions.edit">
|
||||
@@ -49,7 +55,7 @@
|
||||
<entry key="gitignore" value="45" />
|
||||
<entry key="html" value="35" />
|
||||
<entry key="json" value="28" />
|
||||
<entry key="py" value="7376" />
|
||||
<entry key="py" value="9380" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.types.edit">
|
||||
@@ -57,56 +63,14 @@
|
||||
<entry key="HTML" value="35" />
|
||||
<entry key="JSON" value="28" />
|
||||
<entry key="PLAIN_TEXT" value="172" />
|
||||
<entry key="Python" value="7376" />
|
||||
<entry key="Python" value="9380" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.vcs.git.usages" />
|
||||
</session>
|
||||
</component>
|
||||
<component name="FileEditorManager">
|
||||
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
|
||||
<file pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/main.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1690">
|
||||
<caret line="130" column="23" selection-start-line="130" selection-start-column="23" selection-end-line="130" selection-end-column="23" />
|
||||
<folding>
|
||||
<element signature="e#2#38#0" expanded="true" />
|
||||
<marker date="1542684898017" expanded="true" signature="345:474" ph="..." />
|
||||
<marker date="1542684898017" expanded="true" signature="1506:2313" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/HtmlReader.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1703">
|
||||
<caret line="132" column="13" selection-start-line="132" selection-start-column="13" selection-end-line="132" selection-end-column="13" />
|
||||
<folding>
|
||||
<element signature="e#2#31#0" expanded="true" />
|
||||
<marker date="1542684886113" expanded="true" signature="792:941" ph="..." />
|
||||
<marker date="1542684886113" expanded="true" signature="2066:2075" ph="..." />
|
||||
<marker date="1542684886113" expanded="true" signature="2066:2635" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/google/gmailApi.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="533">
|
||||
<caret line="49" column="82" selection-start-line="49" selection-start-column="82" selection-end-line="49" selection-end-column="82" />
|
||||
<folding>
|
||||
<element signature="e#1#38#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
</leaf>
|
||||
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300" />
|
||||
</component>
|
||||
<component name="FileTemplateManagerImpl">
|
||||
<option name="RECENT_TEMPLATES">
|
||||
@@ -151,10 +115,11 @@
|
||||
<option value="$PROJECT_DIR$/.gitignore" />
|
||||
<option value="$PROJECT_DIR$/htmlFiles/OTReport_166a63095fc16625.html" />
|
||||
<option value="$PROJECT_DIR$/.gitattributes" />
|
||||
<option value="$PROJECT_DIR$/Untitled.ipynb" />
|
||||
<option value="$PROJECT_DIR$/google/gmailWorker.py" />
|
||||
<option value="$PROJECT_DIR$/main.py" />
|
||||
<option value="$PROJECT_DIR$/google/gmailApi.py" />
|
||||
<option value="$PROJECT_DIR$/HtmlReader.py" />
|
||||
<option value="$PROJECT_DIR$/main.py" />
|
||||
<option value="$PROJECT_DIR$/Untitled.ipynb" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
@@ -178,7 +143,7 @@
|
||||
<path>
|
||||
<item name="ot_report_v2" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="ot_report_v2" type="462c0819:PsiDirectoryNode" />
|
||||
<item name="htmlFilesv2" type="462c0819:PsiDirectoryNode" />
|
||||
<item name="google" type="462c0819:PsiDirectoryNode" />
|
||||
</path>
|
||||
</expand>
|
||||
<select />
|
||||
@@ -235,7 +200,7 @@
|
||||
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
|
||||
<window_info anchor="bottom" id="TODO" order="6" />
|
||||
<window_info anchor="bottom" id="Version Control" order="7" show_stripe_button="false" />
|
||||
<window_info active="true" anchor="bottom" id="Terminal" order="8" visible="true" weight="0.39638555" />
|
||||
<window_info active="true" anchor="bottom" id="Terminal" order="8" visible="true" weight="0.19518073" />
|
||||
<window_info anchor="bottom" id="Event Log" order="9" side_tool="true" />
|
||||
<window_info anchor="bottom" id="Python Console" order="10" />
|
||||
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
|
||||
@@ -247,9 +212,6 @@
|
||||
<option name="myLimit" value="2678400000" />
|
||||
</component>
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file://$PROJECT_DIR$/google/secrets.py">
|
||||
<provider selected="true" editor-type-id="text-editor" />
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/google/config.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="26">
|
||||
@@ -265,13 +227,6 @@
|
||||
<entry file="file://$PROJECT_DIR$/token.json">
|
||||
<provider selected="true" editor-type-id="text-editor" />
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/htmlParser.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="169">
|
||||
<caret line="13" column="11" selection-start-line="13" selection-start-column="11" selection-end-line="13" selection-end-column="11" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/test.html" />
|
||||
<entry file="file://$PROJECT_DIR$/google/Errors.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
@@ -338,43 +293,72 @@
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/events.csv">
|
||||
<provider selected="true" editor-type-id="text-editor" />
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/main.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1690">
|
||||
<caret line="130" column="23" selection-start-line="130" selection-start-column="23" selection-end-line="130" selection-end-column="23" />
|
||||
<state relative-caret-position="-297">
|
||||
<caret line="95" lean-forward="true" selection-start-line="95" selection-end-line="95" />
|
||||
<folding>
|
||||
<element signature="e#2#38#0" expanded="true" />
|
||||
<marker date="1542684898017" expanded="true" signature="345:474" ph="..." />
|
||||
<marker date="1542684898017" expanded="true" signature="1506:2313" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/HtmlReader.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1703">
|
||||
<caret line="132" column="13" selection-start-line="132" selection-start-column="13" selection-end-line="132" selection-end-column="13" />
|
||||
<folding>
|
||||
<element signature="e#2#31#0" expanded="true" />
|
||||
<marker date="1542684886113" expanded="true" signature="792:941" ph="..." />
|
||||
<marker date="1542684886113" expanded="true" signature="2066:2075" ph="..." />
|
||||
<marker date="1542684886113" expanded="true" signature="2066:2635" ph="..." />
|
||||
<marker date="1543380484795" expanded="true" signature="535:749" ph="..." />
|
||||
<marker date="1543380484795" expanded="true" signature="2330:3137" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/google/gmailApi.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="533">
|
||||
<caret line="49" column="82" selection-start-line="49" selection-start-column="82" selection-end-line="49" selection-end-column="82" />
|
||||
<state relative-caret-position="520">
|
||||
<caret line="40" column="59" selection-start-line="40" selection-start-column="59" selection-end-line="40" selection-end-column="59" />
|
||||
<folding>
|
||||
<element signature="e#1#38#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/google/gmailWorker.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="78">
|
||||
<caret line="6" column="28" selection-start-line="6" selection-start-column="28" selection-end-line="6" selection-end-column="28" />
|
||||
<folding>
|
||||
<element signature="e#0#28#0" expanded="true" />
|
||||
<marker date="1543379726813" expanded="true" signature="95:100" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/google/secrets.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="13">
|
||||
<caret line="1" column="20" selection-start-line="1" selection-start-column="20" selection-end-line="1" selection-end-column="20" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/HtmlReader.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="202">
|
||||
<caret line="108" column="91" selection-start-line="108" selection-start-column="91" selection-end-line="108" selection-end-column="91" />
|
||||
<folding>
|
||||
<element signature="e#2#31#0" expanded="true" />
|
||||
<marker date="1543381166754" expanded="true" signature="792:941" ph="..." />
|
||||
<marker date="1543381166754" expanded="true" signature="2066:2075" ph="..." />
|
||||
<marker date="1543381166754" expanded="true" signature="2066:3163" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/htmlParser.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="156">
|
||||
<caret line="13" column="11" selection-start-line="13" selection-start-column="11" selection-end-line="13" selection-end-column="11" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/events.csv">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state>
|
||||
<caret selection-end-column="88" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</component>
|
||||
</project>
|
||||
@@ -62,8 +62,8 @@ class HtmlReader:
|
||||
|
||||
tbody = workout_summary_table.contents[0]
|
||||
|
||||
v1_id = tbody.contents[0].contents[0].text
|
||||
|
||||
v1_id = tbody.contents[0].contents[0].text
|
||||
|
||||
# the OT_REPORT template has changed slightly over the past few months
|
||||
# the summary is in different places for each type of template
|
||||
@@ -100,6 +100,13 @@ class HtmlReader:
|
||||
|
||||
|
||||
|
||||
# reads all html files in a given directory and scrapes them
|
||||
# we return the aggregated data as a dictionary called 'event'
|
||||
# an event has the following keys: calories,splat_pts,steps,date,time,coach,template_version,avg_heart_rate,peak_heart_rate
|
||||
# these keys are currently found in 3 different sections
|
||||
# 1. this_class: the THIS CLASS row in the summary table
|
||||
# 2. summary: data about date, time and coach
|
||||
# 3. cardio: max and average heart rate data found in the tiles of the email template
|
||||
def read_all(self, directory):
|
||||
|
||||
files = glob.glob(directory)
|
||||
|
||||
100
Untitled.ipynb
vendored
100
Untitled.ipynb
vendored
File diff suppressed because one or more lines are too long
@@ -1 +1 @@
|
||||
{"last_run": "2018-11-26"}
|
||||
{"last_run": "2018-11-27"}
|
||||
BIN
google/__pycache__/gmailWorker.cpython-37.pyc
Normal file
BIN
google/__pycache__/gmailWorker.cpython-37.pyc
Normal file
Binary file not shown.
@@ -38,44 +38,46 @@ class GmailApi:
|
||||
self.service = build('gmail', 'v1', http=creds.authorize(Http()))
|
||||
|
||||
|
||||
# get all labels associated with the authenticated user
|
||||
def get_labels(self):
|
||||
results = self.service.users().labels().list(userId="me").execute()
|
||||
|
||||
return results.get('labels', [])
|
||||
|
||||
|
||||
|
||||
# query gmail for emails.
|
||||
# query format is standard gmail search queries
|
||||
# e.g: 'after: 11/27/2018' -> returns emails that were received after the date given
|
||||
def get_ot_messages(self, query=''):
|
||||
no_new_messages = True
|
||||
|
||||
#gets the ids of all messages that match the OT LabelId and provided query
|
||||
# gets the ids of all messages that match the OT LabelId and provided query
|
||||
results = self.service.users().messages().list(userId="me", labelIds=[OT_LABEL_ID], q=query).execute()
|
||||
|
||||
saved_templates = load_already_parsed_message_ids()
|
||||
|
||||
#if no query is provided we default to pull all data
|
||||
if(query == ''):
|
||||
# if no query is provided we default to pull all data
|
||||
if query == '':
|
||||
no_new_messages = False
|
||||
|
||||
|
||||
#find out the ids of messages that are saved locally
|
||||
# find out the ids of messages that are saved locally
|
||||
for result in results['messages']:
|
||||
if(result['id'] not in saved_templates):
|
||||
if result['id'] not in saved_templates:
|
||||
no_new_messages = False
|
||||
|
||||
|
||||
#if no new messages are found in any case raise error to catch accordingly
|
||||
if(results['resultSizeEstimate'] == 0 or no_new_messages):
|
||||
# if no new messages are found in any case raise error to catch accordingly
|
||||
if results['resultSizeEstimate'] == 0 or no_new_messages:
|
||||
raise Errors.NoMessagesFoundException(userId='me', labelIds=[OT_LABEL_ID], q=query)
|
||||
|
||||
|
||||
log_msg = "Found {} new OT Email(s).".format(len(results["messages"]))
|
||||
print(log_msg)
|
||||
|
||||
|
||||
return results["messages"]
|
||||
|
||||
|
||||
|
||||
# get a single message by message_id
|
||||
def get_message(self, message_id):
|
||||
m_res = self.service.users().messages().get(id=message_id, userId='me').execute()
|
||||
|
||||
|
||||
20
google/gmailWorker.py
Normal file
20
google/gmailWorker.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from threading import Thread
|
||||
from google.gmailApi import GmailApi
|
||||
|
||||
|
||||
|
||||
class GmailWorker(Thread):
|
||||
def __init__(self, queue):
|
||||
Thread.__init__(self)
|
||||
self.queue = queue
|
||||
self.gmail = GmailApi()
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
message_id,parse = self.queue.get()
|
||||
|
||||
content = self.gmail.get_message(message_id)
|
||||
|
||||
parse(content,message_id)
|
||||
|
||||
self.queue.task_done()
|
||||
@@ -1,20 +0,0 @@
|
||||
from html.parser import HTMLParser
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# class MyHTMLParser(HTMLParser):
|
||||
# def handle_starttag(self, tag, attrs):
|
||||
# print("Encountered a start tag:", tag)
|
||||
|
||||
# def handle_endtag(self, tag):
|
||||
# print("Encountered an end tag :", tag)
|
||||
|
||||
# def handle_data(self, data):
|
||||
# print("Encountered some data :", data)
|
||||
|
||||
with open('test.html') as file:
|
||||
soup = BeautifulSoup(file)
|
||||
print(soup.prettify())
|
||||
|
||||
|
||||
|
||||
|
||||
32
main.py
32
main.py
@@ -2,7 +2,9 @@
|
||||
|
||||
from google.gmailApi import GmailApi
|
||||
from google.Errors import NoMessagesFoundException
|
||||
from google.gmailWorker import GmailWorker
|
||||
from pytime import pytime
|
||||
from queue import Queue
|
||||
import HtmlReader
|
||||
import base64
|
||||
import json
|
||||
@@ -10,6 +12,9 @@ import sys
|
||||
import csv
|
||||
|
||||
|
||||
#get the config
|
||||
#currently only returns a json object with a single key
|
||||
#the key will tell us when we last ran the program
|
||||
def get_config():
|
||||
config_file = open('config.json')
|
||||
config = json.load(config_file)
|
||||
@@ -24,7 +29,8 @@ def save_config(config):
|
||||
config_file.close()
|
||||
|
||||
|
||||
|
||||
#write the file to disk.
|
||||
#name is generally of the format 'OTReport_<messageid>.html'
|
||||
def write_to_html_file(html, name):
|
||||
path = "./htmlFilesv2/{}".format(name)
|
||||
file = open(path, "w")
|
||||
@@ -33,6 +39,7 @@ def write_to_html_file(html, name):
|
||||
file.close()
|
||||
|
||||
|
||||
#parses the raw byte content of an OT Email and writes it to ./htmlFilesv2 as an html file
|
||||
def parse_message(msg_json, msg_id):
|
||||
message_parts = msg_json["payload"]["parts"]
|
||||
|
||||
@@ -44,24 +51,40 @@ def parse_message(msg_json, msg_id):
|
||||
file_name = 'OTReport_{}.html'.format(msg_id)
|
||||
write_to_html_file(result, file_name)
|
||||
|
||||
|
||||
def get_last_run_time(timestamp):
|
||||
yesterday = str(pytime.before(timestamp, '1d')).split(' ')[0]
|
||||
|
||||
return yesterday
|
||||
|
||||
|
||||
#pulls the gmail data
|
||||
#creates 4 worker threads to speed up the download and parsing of emails
|
||||
def pull_gmail_data(query=''):
|
||||
|
||||
|
||||
|
||||
gmail = GmailApi()
|
||||
queue = Queue()
|
||||
|
||||
messages = gmail.get_ot_messages(query)
|
||||
|
||||
#start 4 worker threads to speed up the download and parsing of emails
|
||||
for x in range(4):
|
||||
print('starting worker')
|
||||
worker = GmailWorker(queue)
|
||||
worker.daemon = True
|
||||
worker.start()
|
||||
|
||||
|
||||
#push task into the queue as a tuple
|
||||
#second item in the task tuple is always the parse_message function defined above
|
||||
for message in messages:
|
||||
id = message['id']
|
||||
m_res = gmail.get_message(id)
|
||||
queue.put((id,parse_message))
|
||||
|
||||
parse_message(m_res, id)
|
||||
|
||||
queue.join() #wait
|
||||
|
||||
def do_latest(config):
|
||||
last_run = get_last_run_time(config['last_run'])
|
||||
@@ -69,6 +92,8 @@ def do_latest(config):
|
||||
|
||||
pull_gmail_data(query)
|
||||
|
||||
|
||||
|
||||
def write_to_csv(events):
|
||||
with open('events.csv', 'w', newline='') as csvFile:
|
||||
field_names = ['calories', 'splat_pts', 'steps', 'date', 'time', 'coach', 'template_version', 'avg_heart_rate', 'peak_heart_rate']
|
||||
@@ -110,6 +135,7 @@ def main():
|
||||
last_run = get_last_run_time(config['last_run'])
|
||||
query = 'after:{}'.format(last_run)
|
||||
|
||||
|
||||
#try to find data
|
||||
try:
|
||||
pull_gmail_data(query)
|
||||
|
||||
Reference in New Issue
Block a user