-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathclustering-percentDiff-compare.sql
168 lines (148 loc) · 5.18 KB
/
clustering-percentDiff-compare.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
USE MDM
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#data_kwh') IS NOT NULL DROP TABLE #data_kwh
IF OBJECT_ID('tempdb..#LocationReadValueMax') IS NOT NULL DROP TABLE #LocationReadValueMax
IF OBJECT_ID('tempdb..#data_summary') IS NOT NULL DROP TABLE #data_summary
IF OBJECT_ID('tempdb..#data_total') IS NOT NULL DROP TABLE #data_total
IF OBJECT_ID('tempdb..#model') IS NOT NULL DROP TABLE #model
IF OBJECT_ID('tempdb..#meters') IS NOT NULL DROP TABLE #meters
IF OBJECT_ID('tempdb..#match_results') IS NOT NULL DROP TABLE #match_results
SELECT
h.ReadLogDate
, d.ReadDate
, d.Readvalue
, h.Uom
, d.ReadQualityCode
, d.RecordInterval
, d.VeeFlag
, d.ChannelStatus
, s.SubstationName SubstationName
, h.MeterIdentifier
, l.LocationNumber
INTO #data_kwh
FROM mdm.dbo.meterreadintervalheader h
INNER JOIN mdm.dbo.meter m
ON m.meteridentifier = h.meteridentifier
INNER JOIN mdm.dbo.MeterReadIntervalDetail d
ON d.meterreadintervalheaderid = h.meterreadintervalheaderid
INNER JOIN mdm.dbo.electricmeters em
ON em.meteridentifier= m.meteridentifier
INNER JOIN mdm.dbo.location l
ON l.locationid = m.locationid
INNER JOIN mdm.dbo.substation s
ON s.substationid = l.substationid
WHERE
h.readlogdate >= '2018-01-01'
AND h.readlogdate < '2018-02-01'
AND h.uom = 'kWh'
--AND d.ReadQualityCode = '3' --Good Reads Only for now
-- get the max for each location
SELECT
max(Readvalue) [max_per_group]
, LocationNumber
INTO #LocationReadValueMax
FROM #data_kwh
GROUP BY LocationNumber
-- Get a Summary of the data
-- SELECT COUNT(*) FROM #data_summary
SELECT
convert(varchar(20), ReadDate, 8) AS 'hhmm'
, Uom
, #data_kwh.LocationNumber
, MeterIdentifier
, avg(Readvalue) mean_value
--, median(Readvalue) median
, min(Readvalue) min_value
, max(Readvalue) max_value
, sum(Readvalue) sum_value
, AVG(max_per_group) scaled_mean
, STDEV(ReadValue) std
, NULL AS scaled_value_max
, NULL AS scaled_value_sd
, count(*) n
INTO #data_summary
FROM #data_kwh
INNER JOIN #LocationReadValueMax
ON #LocationReadValueMax.LocationNumber = #data_kwh.LocationNumber
GROUP BY
convert(varchar(20), ReadDate, 8)
, Uom
, #data_kwh.LocationNumber
, MeterIdentifier
SELECT
LocationNumber
, MeterIdentifier
, count(*) intervals
, 2976 expected
INTO #data_total
FROM
#data_summary
GROUP BY
LocationNumber,
MeterIdentifier
-- Remove Bad Locations, without expected interval counts
DELETE FROM #data_summary
WHERE LocationNumber
IN (SELECT LocationNumber FROM #data_total WHERE intervals < 2976) --2976
-- create scaled values b/c this is where the $ is ...
UPDATE #data_summary
SET
scaled_value_max = mean_value / nullif(max_value, 0)
--,scaled_value_sd = mean_value / std
-- Get all the LocationNumbers, iterate over them
DECLARE @Model_LocationNumber VARCHAR(50)
DECLARE @LocationNumber VARCHAR(50)
DECLARE @percent_diff DECIMAL(18,5)
DECLARE cursor_meters CURSOR FOR
SELECT LocationNumber FROM #data_summary GROUP BY LocationNumber
-- Get the Model another cursor here... dang... b/c we can have multiple models
SELECT @Model_LocationNumber = 4191826
SELECT * INTO #model FROM #data_summary WHERE LocationNumber = @Model_LocationNumber
OPEN cursor_meters
FETCH NEXT FROM cursor_meters INTO @LocationNumber
WHILE @@FETCH_STATUS = 0
BEGIN
-- Compare model and this location
SELECT #model.hhmm model_hhmm
, t.hhmm sample_hhmm
, #model.mean_value model_mean_value
, t.mean_value sample_mean_value
, (#model.mean_value - t.mean_value) / NULLIF( ((#model.mean_value + t.mean_value)/2),0) AS percent_diff
INTO #match_results
FROM #model
INNER JOIN
(SELECT hhmm, mean_value, LocationNumber
FROM #data_summary
WHERE LocationNumber = @LocationNumber
AND LocationNumber != @Model_LocationNumber
) AS t
ON t.hhmm = #model.hhmm
-- Get the total percent_diff
SElECT @percent_diff = SUM(percent_diff) FROM #match_results
-- if percent_diff is < X (idk, need to test this out) save result as matching somewhere
if (ABS(@percent_diff) < 0.10)
BEGIN
PRINT ('MATCH FOUND -> Model LN : ' + @Model_LocationNumber + '| LocationNumber : ' + @LocationNumber + ' | percent_diff : ' + CAST(@percent_diff AS VARCHAR))
END
IF OBJECT_ID('tempdb..#match_results') IS NOT NULL DROP TABLE #match_results
FETCH NEXT FROM cursor_meters INTO @LocationNumber
END
CLOSE cursor_meters
DEALLOCATE cursor_meters
SET NOCOUNT OFF
/*review
MATCH FOUND -> Model LN : 118419| LocationNumber : 118419 | percent_diff : 0.00000
SELECT model.hhmm model_hhmm
, t.hhmm sample_hhmm
, model.mean_value model_mean_value
, t.mean_value sample_mean_value
, (model.mean_value - t.mean_value) / NULLIF( ((model.mean_value + t.mean_value)/2),0) AS percent_diff
FROM (SELECT hhmm, mean_value, LocationNumber
FROM #data_summary
WHERE LocationNumber = 118419) AS model
INNER JOIN
(SELECT hhmm, mean_value, LocationNumber
FROM #data_summary
WHERE LocationNumber = 118419) AS t
ON t.hhmm = model.hhmm
*/