Pontifications

  • See Managed to extract Hub live log for my usage via python script
  • amazing what you can do with scrapers :-) I bet I can reuse this code when scraping instagram, flickr and other places where the API is going to stop functioning.
  • I only copy and pasted the code below because I don’t trust S*msung to keep the Smart Things website up.

Code by Philippe_Portes:

import platform
from bs4 import BeautifulSoup
from selenium import webdriver
import datetime
import time
import re
import difflib

login_l = 'yourSTusername'
password_l='yourSTpassword'

#function to extract needed string
def xstr(s):
if (s==None) or len(s)==0:
    return ""
else:  
    return ''.join(s)

# PhantomJS files have different extensions
# under different operating systems
if platform.system() == 'Windows':
PHANTOMJS_PATH = './phantomjs.exe'
else:
PHANTOMJS_PATH = './phantomjs'

# Initialize difflib for later usage
d= difflib.Differ()

# here we'll use pseudo browser PhantomJS,
# but browser can be replaced with browser = webdriver.FireFox(),
# which is good for debugging.
browser = webdriver.PhantomJS(PHANTOMJS_PATH)
browser.get('https://graph-na02-useast1.api.smartthings.com/login/auth')
username = browser.find_element_by_name('j_username')
password = browser.find_element_by_name('j_password')
username.send_keys(login_l)
password.send_keys(password_l)

browser.find_element_by_name('_action_Log in').click();
#time.sleep(10) # to replace by a page.load detection to ensure the login was successfully done.
print "Authenticating :",

while(login_l not in browser.page_source):
print ".",
print ""
print "Authenticated as ", login_l
browser.get('https://graph-na02-useast1.api.smartthings.com/ide/logs')

# variable needed log and page parsing management
first_log_trace=''
log_trace=''
old_page=''

#CSV file name
csv_file_name=''
last_file_name=''

print "Parsing...",

while (True):            
time.sleep(10) # put a sleep to avoid socket get messed-up while in WAIT state

#Check if the page content changed (=new logs)
while browser.page_source==old_page:
    pass
   
# new page content, we'll have to write things in the log file
new_log=1

#Prepare for file name based on the date.
timestr = time.strftime("%Y%m%d")

# first logs in this file might include last minutes of the day before logs
logfile= open(timestr+".csv","a+")

# If new file name, add csv header.
if last_file_name!=timestr+'.csv':
    # write csv header
    logfile.write('"Log_Class";"Device_Name";"Local_Log_Time";"Log_Message"')
    last_file_name=timestr+".csv"
    
# Prepare to parse page
new_page=browser.page_source

soup = BeautifulSoup(new_page, "html.parser")

# keep track of the page content to avoid reloading same page content
old_page=new_page

# We'll put the device names and the corresponding href used by ST logs to put the name in clear in the logs we export
devicename_dic={}
# the devices already traced are added as filters option on the top of the window,
# checking all element of <a class=filter> help to know the correspondance between later href URL and ST device names 
div_tools = soup.findAll('div', {'class':'tools'}) 
# loop in the device names
for child in div_tools:
    device_href_name=child.findAll('li')
    for href in device_href_name:
        device_href=re.search('.*href="(.*)".*', str(href.a))
        device_name=re.search('.*href=.*>(.*)</a>', str(href.a))
        if device_name != None:
            devicename_dic[str(device_href.group(1))]=str(device_name.group(1))
# search for the div sections with style attributes containing the logs
div_class_body = soup.findAll('div', {'style':'display: block;'})

for child in div_class_body:
    log_class=xstr(re.findall('<div class="(.*?)".*', str(child)))
    # extract the url from the console log and match it to the device name we stored.
    device_href=xstr(re.findall('.*href="(.*)".*', str(child)))
    # extract the time. Will be in format (H)H:(M)M:(S)S AM/PM TZ
    log_time=xstr(re.findall('<span class="time">(.*?)</span>', str(child)))
    # extract the log message text.
    log_msg=xstr((re.findall('.*msg">(.*?)</span>', str(child), re.MULTILINE)))
    # build the string. Due to ST displays the newest first, each line will be written in the file in the new->old order for this group
    # example:
    # [c
    #  b
    #  a] ==> first set of logs 
    # [f
    #  e] ==> second set of logs
    # This is not really important as file can be reordered by the program using the csv file. However, smartthings doesn't add the day 
    log_trace = '"'+log_class+ '";'+'"'+ devicename_dic.get(device_href,"")+ '";'+'"'+log_time+'";'+'"'+log_msg+'"\n'
    
    # events being in reverse date order, stop writing to files events already written last time.
    if (log_trace==first_log_trace ):
        new_log=0
    #first time parsing? then write csv header in the file and set variables for next time in order to avoid writing past events
    if first_log_trace=='':
        first_log_trace=log_trace
        new_log=1
    # got new logs? print them in the file
    if new_log==1:
        print log_trace
        logfile.write(log_trace)
   
logfile.close
Example of logs as csv file I got:
"Log_Class";"Device_Name";"Local_Log_Time";"Log_Message"
"log debug";"Arrival Sensor guest";"3:55:14 PM PST";"Creating battery event for voltage=2.2V: Arrival Sensor guest battery is 70%"

Leave a comment on github