# getnoaa.pl
# Jim Miller, 9:31 PM Tue March 21, 2023
use LWP::Simple;
use Time::Local;
use Date::Calc qw(Decode_Date_US Now Today);
use Win32::ODBC;
use Date::Manip qw(ParseDate DateCalc UnixDate);
use DateTime;
use JSON;
#use strict;
my ($WindDir, $WindAvg, $WindMax, $TempAvg, $DewPoint, $BPAvg);
my $googleSheet_data;
my %shortNames;
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# Subroutines
sub cS {
# clearString (cS)
# This prepares values before writing to the Google sheet.
# It translates the Null to an empty string.
my $stringValue = $_[0];
if ($stringValue eq "Null") {
$stringValue = "";
}
return $stringValue;
}
sub WriteToMDB {
my $DateTimeStamp;
my $LiteralDateTimeStamp;
my ($Waconia_Year, $Waconia_Month, $Waconia_Day, $Waconia_Hour, $Waconia_Min, $Waconia_Sec);
my @newRow_googleSheet;
($Waconia_Hour, $Waconia_Min, $Waconia_Sec) = Now();
($Waconia_Year, $Waconia_Month, $Waconia_Day) = Today();
# The following scaler, PerlTime, is useful for having a single field to
# characterize time. Note this function expects months to start at zero and
# years to be relative to 1900.
$PerlTime = timelocal(0,($Waconia_Min),($Waconia_Hour),$Waconia_Day,($Waconia_Month-1),($Waconia_Year));
# Create the sql statement for inserting a record of data into the mdb file.
# If there is a funny 24 in the time stamp, convert it to a more normal 0 to 23 convention...
# (apparently this never gets used...)
if ($TheHour == 24) {
$TheDate = UnixDate( Date::Manip::DateCalc( $TheDate, "+ 1 days" ), "%m/%d/%Y");
$TheHour = 0;
}
# If there's no data (i.e. N/A), set it to Null string (for SQL).
if ($WindDir eq "N/A") {$WindDir = "Null"};
if ($WindAvg eq "N/A") {$WindAvg = "Null"};
if ($WindMax eq "N/A") {$WindMax = "Null"};
if ($TempAvg eq "N/A") {$TempAvg = "Null"};
if ($DewPoint eq "N/A") {$DewPoint = "Null"};
if ($BPAvg eq "N/A") {$BPAvg = "Null"};
$DateTimeStamp = $TheDate . " " . $TheHour . ":" . $TheMin;
$LiteralDateTimeStamp = $ParsedDate;
# Note that in the VALUES part of the SQL, strings and dates are quoted, numbers are not.
$sql = "INSERT INTO FifteenMinData (PerlTime, DateTimeStamp, LiteralDateTimeStamp, TimeMDY, TimeHr, TimeMin, StationNumber, StationName, ";
$sql .= "WindDirection, WindSpeed, WindGust, ";
$sql .= "TempAvg, DewPoint, Pressure) ";
$sql .= "VALUES ($PerlTime,'$DateTimeStamp','$LiteralDateTimeStamp','$TheDate',$TheHour,$TheMin,$StationNumber,'$TheCity',";
$sql .= "$WindDir,$WindAvg,$WindMax,";
$sql .= "$TempAvg,$DewPoint,$BPAvg)";
# print "SQL = $sql\n";
# If all numeric values are reasonable, write the record...
# Note: one of these raw values may be an "N/A" string. Those cases always evaluate true and do not block the write.
if ((($TempAvg >= -20) && ($TempAvg <= 120)) &&
(($WindAvg >= 0) && ($WindAvg <= 110)) &&
(($WindMax >= 0) && ($WindMax <= 130)) &&
((($BPAvg >= 28.50) && ($BPAvg <= 31.00)) || ($BPAvg == 0)) &&
(($TheHour >= 0) && ($TheHour <= 24)) &&
(($TheMin >= 0) && ($TheMin <= 59)) ) {
# Print the intended write to the database.
print "$TheDate, $TheHour, $TheMin, $StationNumber, $TheCity, $TempAvg, $DewPoint, $WindStuff, $WindDir, $WindAvg, $WindMax, $BPAvg\n";
# Execute the sql
# First write to the database on Waconia
# I have added error checking here.... Note that the Sql
# method returns undefined if it is successful and a non zero integer
# error number if it fails.
if ($TelemOK) {
if ($cnTelem->Sql($sql)) {
# If not the -1605 error message (duplicate records) then log it.
if (!($cnTelem->Error() =~ /-1605/)) {
print OUTPUTFILE localtime(time) . " Waconia SQL failure, Error: " . $cnTelem->Error() . ", SQL=" . $sql . "\n";
print " Waconia Err : " . $cnTelem->Error() . "\n";
} else {
print " Waconia: SQL failed (probably the record already exists).\n";
}
} else {
print "SQL write succeeded, ";
# Prepare to add a new row to the googleSheet_data array.
($TheYear, $TheMonth, $TheDay) = Decode_Date_US($TheDate);
my $pt_time = DateTime->new(
year => $TheYear,
month => $TheMonth,
day => $TheDay,
hour => $TheHour,
minute => $TheMin,
second => 0,
);
my $pt_string = $pt_time->strftime('%m/%d/%Y %H:%M:%S');
print "PST= $pt_string, ";
# This might have to be checked for daylight time in this NOAA data.
my $utc_time = $pt_time->clone->add(hours => 8);
my $utc_string = $utc_time->strftime('%m/%d/%Y %H:%M:%S');
print "UTC= $utc_string\n";
@newRow_googleSheet = ( $shortNames{$TheCity}, $utc_string, cS($TempAvg), cS($DewPoint), cS($WindDir), cS($WindAvg), cS($WindMax), cS($BPAvg) );
push @$googleSheet_data, \@newRow_googleSheet;
}
}
# Write out the TimeMDY date to a special table that is used to quickly
# populate the date combo box
$sql = "INSERT INTO DaysGleaned (TimeMDY) VALUES ('$TheDate')";
if ($TelemOK) {$cnTelem->Sql($sql);}
} else { # Here is the corresponding else of the if-values-are-reasonable block.
print "$TheDate, $TheHour, $TheMin, $StationNumber, $TheCity, $TempAvg, $DewPoint, $WindStuff, $WindDir, $WindAvg, $WindMax, $BPAvg\n";
print " Record failed reasonable test\n";
}
}
sub getpage {
my $URL = $_[0];
# The next 9 elements passed in are supplied by the Now array in the main
# body of this script.
my @Now = ($_[1],$_[2],$_[3],$_[4],$_[5],$_[6],$_[7],$_[8],$_[9]);
# Get the page of data. Note the get function from 'LWP::Simple' returns
# undefined on error, so check for errors as follows....
$foundPage = True;
unless (defined ($content = get $URL)) {
print OUTPUTFILE "Timestamp = @Now[5,4,3,2,1]\n";
print OUTPUTFILE "Could not get $URL\n";
close(OUTPUTFILE);
$foundPage = False;
}
}
sub dir360 {
my $dirString = $_[0];
my $dirAngle;
if ($dirString eq "N" ) { $dirAngle = 360; }
elsif ($dirString eq "NE") { $dirAngle = 45; }
elsif ($dirString eq "E" ) { $dirAngle = 90; }
elsif ($dirString eq "SE") { $dirAngle = 135; }
elsif ($dirString eq "S" ) { $dirAngle = 180; }
elsif ($dirString eq "SW") { $dirAngle = 225; }
elsif ($dirString eq "W" ) { $dirAngle = 270; }
elsif ($dirString eq "NW") { $dirAngle = 315; }
else { $dirAngle = "N/A"; }
return $dirAngle;
}
sub parseandwrite {
$TheCity = $_[0];
# This offset is used to handle city names that have two words, for example "THE DALLES"
my $offset = $_[1];
$StationNumber = $_[2];
# Split the page into lines
@thelines = split (/\n/, $content);
$nlines = @thelines;
# Initialize the sensor values.
$WindDir = "N/A";
$WindAvg = "N/A";
$WindMax = "N/A";
$TempAvg = "N/A";
$DewPoint = "N/A";
$BPAvg = "N/A";
# Parse it
foreach (@thelines) {
if (/PDT/ or /PST/) {
# Split the line into words.
@thewords = split(" ", $_);
# Set this timezone parameter so the call to ParseDate works.
#
# Note that this does NOT override the TZ value that is set in
# C:\Perl\site\lib\Date\manip.pm
# I only set it here in case it is removed in manip.pm, then I correct
# for the 2 hour difference down below where I shift back to
# standard time.
#
# Changes for MN (from PST8PDT)
# PST8PDT, Pacific time. MST7MDT, Mountain time. CST6CDT, Central time. EST5EDT, Eastern time
#$TZ = "PST8PDT";
$TZ = "CST6CDT";
# Substitute, so can get that silly ":" in there. First grab the
# first 4 characters. Note there is a space at the beginning of the
# line so must go 1 to 5 not 0 to 4
$thetime = substr($_,1,5);
#print " thetime = $thetime \n";
# If 1000, 10:00, do a special form of the substitution.
if ($thetime =~ /000/) {
$_ =~ s/000/0:00/;
} else {
$_ =~ s/00/:00/;
}
#print " Raw stuff = $_ \n";
# Parse the date to get around the problem of their am/pm format.
#print "TZ = $ENV{'TZ'}\n";
$ParsedDate = ParseDate($_);
#print "ParseDate = $ParsedDate\n";
# Subtract an hour to get back to standard time.
# And subtract two hours to get back to Pacific time because DateCalc
# tries to present the result in the local time zone.
if ($thewords[2] eq "PDT") {
$PSTDate = DateCalc($ParsedDate,"- 3hours",\$err);
} else {
$PSTDate = DateCalc($ParsedDate,"- 2hours",\$err);
}
#print "PSTDate = $PSTDate \n";
# Use this formating function to return a date string
$TheDate = UnixDate($PSTDate, "%m/%d/%y");
# Finally get the hour and min with a simple substring call
$TheHour = substr($PSTDate,8,2) + 0;
$TheMin = substr($PSTDate,11,2) + 0;
#print " $TheDate, $TheHour, $TheMin \n";
}
# Look for the first occurrence of the city where the NOT AVBL string is
# absent. Not that the LAST statement below will cause a break out of
# the for loop; that is the mechanism I use to prevent the city from
# being parsed twice if it shows more than once on the page.
# Also if you're worried about getting the wrong date associated with a
# city match farther down on the page, don't. If a city match is made in
# a secondary report on the page, the date match will have been updated
# also; it keeps trying to update the date for each line in the for loop.
# So don't be afraid to use urls that have multiple reports on the page.
if ((/$TheCity/) && !(/NOT AVBL/)) {
# The sky cover is two words only for "LGT RAIN"
if (/LGT RAIN/ || /LGT SNOW/) {$offset++}
@thewords = split(" ", $_);
# If there's a letter at the end of the pressure string, remove it.
# Otherwise just take the whole thing.
$PressureStuff = $thewords[6 + $offset];
$PressureStuff =~ /[a-zA-Z]/g;
if (defined ($FirstLetter = pos($PressureStuff))) {
$BPAvg = substr($PressureStuff, 0, $FirstLetter - 1);
} else {
$BPAvg = $PressureStuff;
}
# Not using this anymore, but left here as an example... The
# following use of substr returns all but the last character in the
# target string (If the third parameter is negative, it leaves that
# many off the end of the target string). Target, offset, length.
#$BPAvg = substr($thewords[6 + $offset],0,-1);
$TempAvg = $thewords[2 + $offset];
$DewPoint = $thewords[3 + $offset];
$WindStuff = $thewords[5 + $offset];
# Interpret the windstuff string.
# Check for CALM
if ( ($WindStuff =~ /CALM/g) || ($WindStuff =~ /MISG/g) ) {
$WindAvg = 0;
$WindMax = 0;
$WindDir = "N/A";
} else {
# Split up the windstring into 4 parts.
($WindDir_string,$WindAvg,$Gust,$WindMax) = $WindStuff =~ /(\D+)(\d+)(\D+)?(\d+)?/;
# Translate the winddirection string into a number.
$WindDir = &dir360($WindDir_string);
# If can't find the G (for Gust) in the wind string, just set the
# max to the avg value
if (!defined($Gust)) {
$WindMax = $WindAvg;
}
}
# The following call, to write out to the database, only gets run if
# the city match above was successful.
WriteToMDB;
# Use this "last" statement to exit the for loop. Because there are
# multiple line with these city strings. This way we only use the
# first find with data.
last;
}
}
}
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# Main Program
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# Open the log file. Note the '$!' variable returns system errors.
@Now = localtime(time);
# Initialize the Google-sheet array that will add a row of data for each successful SQL write.
$googleSheet_data = [];
%shortNames = ('NORTH BEND' => 'NBend', 'MOSES LAKE' => 'MLake',
'ELLENSBURG' => 'EBurg', 'WALLA WALLA' => 'Walla',
'HERMISTON' => 'Hermi', 'PASCO' => 'Pasco',
'THE DALLES' => 'Dalle', 'PORTLAND' => 'PLand');
open(OUTPUTFILE, '>>C:\Users\Jim\Documents\webcontent\waconia\rosielog.txt') or do {
print "Timestamp = @Now[5,4,3,2,1]\n";
print "Could not find rosielog.txt\n";
die "On open (Jdm): $!";
}; # This semicolon is important here...
print OUTPUTFILE localtime(time) . " from getnoaa.pl " . "\n";
# Make database connection objects and point them at the mdb files. Set flags
# to 0 if there is a problem.
$DSN = "DRIVER={Microsoft Access Driver (*.mdb)};DBQ=C:/Users/Jim/Documents/webcontent/waconia/data/telem.mdb;UID=admin";
if (!($cnTelem = new Win32::ODBC($DSN))) {
print OUTPUTFILE "Timestamp = @Now[5,4,3,2,1]\n";
print OUTPUTFILE "Error connecting to $DSN\n";
print OUTPUTFILE "Error: " . Win32::ODBC::Error() . "\n";
$TelemOK = 0;
warn "Error connecting to $DSN\n";
} else {
$TelemOK = 1;
}
# Get the data and send it to the database.
#getpage("http://iwin.nws.noaa.gov/iwin/wa/hourly.html", @Now);
#getpage("http://www.weather.gov/view/prodsByState.php?state=WA&prodtype=hourly", @Now);
getpage("http://forecast.weather.gov/product.php?site=CRH&product=RWR&issuedby=WA", @Now);
if ($foundPage) {
parseandwrite("PORTLAND", 1, 51);
parseandwrite("THE DALLES", 2, 52);
parseandwrite("PASCO", 0, 53);
parseandwrite("HERMISTON", 1, 54);
parseandwrite("WALLA WALLA", 1, 55);
parseandwrite("ELLENSBURG", 0, 56);
parseandwrite("MOSES LAKE", 1, 57);
}
# getpage("http://www.wrh.noaa.gov/pendleton/data/text/pdxhwrsch.html", @Now);
# if ($foundPage){
# parseandwrite("BOARDMAN", 1, 58);
# parseandwrite("IRRIGON", 1, 60);
# parseandwrite("UMATILLA", 1, 61);
# parseandwrite("ROOSEVELT", 0, 62);
# parseandwrite("CELILO", 1, 63);
# parseandwrite("RUFUS", 1, 64);
# }
#getpage("http://iwin.nws.noaa.gov/iwin/or/hourly.html", @Now);
#getpage("http://www.weather.gov/view/prodsByState.php?state=OR&prodtype=hourly", @Now);
getpage("http://forecast.weather.gov/product.php?site=CRH&product=RWR&issuedby=OR", @Now);
if ($foundPage) {
parseandwrite("NORTH BEND", 1, 65);
}
# The following two lines of code are useful for testing the regular expression.
# It's just a test page on a local server.
#getpage("http://waconia.pnl.gov/TestWaconiaParse.htm", @Now);
#parseandwrite("RUFUS2", 1, 64);
# Close the database and file connections
if ($TelemOK) {$cnTelem->Close();}
close(OUTPUTFILE);
# Send the array of successful writes to the Google sheet.
my $row_count = scalar @$googleSheet_data;
print "row_count = $row_count \n";
if ($row_count > 0) {
# Put the data into a JSON data structure.
my $json_data = {
'sheetName' => 'noaa',
'weatherData' => $googleSheet_data
};
# Convert the JSON data to a string.
my $json_string = encode_json($json_data);
# Write the string to the file.
open(my $fh, '>', 'noaadata.json') or die "Can't open file: $!";
print $fh $json_string;
close $fh;
# Run the Python program that reads the file and Posts the JSON to the spreadsheet. Specify the file as an argument.
my $cmd = "C:\\Users\\Jim\\Documents\\webcontent\\waconia\\pythonPostToSheet.py noaadata.json";
system($cmd);
}