Skip to content

Commit

Permalink
Parse small letters in the end of identifier number, transform draft …
Browse files Browse the repository at this point in the history
…dates to the consistent format
  • Loading branch information
mico authored and ronaldtse committed Mar 21, 2022
1 parent 32695d8 commit b08ec69
Show file tree
Hide file tree
Showing 7 changed files with 192 additions and 141 deletions.
1 change: 1 addition & 0 deletions lib/pubid/ieee.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ module Ieee

require_relative "ieee/errors"
require_relative "ieee/parser"
require_relative "ieee/transformer"
require_relative "ieee/identifier"
18 changes: 15 additions & 3 deletions lib/pubid/ieee/identifier.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,20 @@ class Identifier
:edition, :draft, :rev, :corr, :amd, :redline, :year, :month, :type, :alternative,
:draft_status

def initialize(**opts)
opts.each { |key, value| send("#{key}=", value.is_a?(Enumerable) && value || value.to_s) }
def initialize(organizations:, type_status:, number:, parameters:)
@number = number
[organizations, type_status, parameters].each do |data|
case data
when Hash
set_values(data)
when Array
set_values(Identifier.merge_parameters(data))
end
end
end

def set_values(hash)
hash.each { |key, value| send("#{key}=", value.is_a?(Enumerable) && value || value.to_s) }
end

def self.update_old_code(code)
Expand All @@ -37,7 +49,7 @@ def self.merge_parameters(params)
end

def self.parse(code)
new(**merge_parameters(Parser.new.parse(update_old_code(code))).to_h)
new(**merge_parameters(Transformer.new.apply(Parser.new.parse(update_old_code(code)))).to_h)

rescue Parslet::ParseFailed => failure
raise Pubid::Ieee::Errors::ParseError, "#{failure.message}\ncause: #{failure.parse_failure_cause.ascii_tree}"
Expand Down
83 changes: 44 additions & 39 deletions lib/pubid/ieee/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Parser < Parslet::Parser
end

rule(:number) do
(digits | match("[A-Z]")).repeat(1).as(:number)
((digits | match("[A-Z]")).repeat(1) >> match("[a-z]").maybe).as(:number)
end

rule(:part) do
Expand Down Expand Up @@ -57,50 +57,55 @@ class Parser < Parslet::Parser
((str(", ") | str(" ")) >> match("[A-Za-z]").repeat(1).as(:month) >>
(((str(" ") >> digits.as(:day)).maybe >>
str(", ") >> match('\d').repeat(4, 4).as(:year)) |
((str(" ") | str(", ")) >> match('\d').repeat(4, 4).as(:year)))
((str(" ") | str(", ")) >> match('\d').repeat(2, 4).as(:year)))
).maybe
end

rule(:part_subpart_year) do
# 802.15.22.3-2020
# 1073.1.1.1-2004
(part >> subpart.repeat(2, 2).as(:subpart) >> year) |
# C57.12.00-1993
(part >> subpart.as(:subpart) >> year) |
# N42.44-2008
# 1244-5.2000
# 11073-40102-2020
# C37.0781-1972
(part >> year) |
# C57.19.101
(part >> subpart.as(:subpart)) |
# IEEE P11073-10101
# IEEE P11073-10420/D4D5
# trick to avoid being partially parsed by year
(str("-") >> match('[\dA-Z]').repeat(5).as(:part)) |
# 581.1978
year |
# IEC 62525-Edition 1.0 - 2007
edition.as(:edition) |
# 61691-6
part

end

rule(:dual_pubids) do
str(" ") >>
((str("(") >> (identifier.as(:alternative) >> str(", ").maybe).repeat(1) >>
str(")")) | (str("and ") >> identifier.as(:alternative)) |
identifier.as(:alternative))
end

rule(:number_prefix) do
((str("No") | str("no")) >> (str(".") | str(" "))).maybe >> str(" ").maybe
end

rule(:identifier) do
organization.as(:publisher) >> ((str("/ ") | str("/")) >> organization.as(:copublisher)).repeat >>
draft_status.maybe >>
str(" ") >> (type.as(:type) >> str(" ")).maybe >> (
(str("No") | str("no")) >> (str(".") | str(" "))
).maybe >> str(" ").maybe >>
number >>
# part/subpart/year patterns:
(
# 802.15.22.3-2020
# 1073.1.1.1-2004
(part >> subpart.repeat(2, 2).as(:subpart) >> year) |
# C57.12.00-1993
(part >> subpart.as(:subpart) >> year) |
# N42.44-2008
# 1244-5.2000
# 11073-40102-2020
# C37.0781-1972
(part >> year) |
# C57.19.101
(part >> subpart.as(:subpart)) |
# IEEE P11073-10101
# IEEE P11073-10420/D4D5
# trick to avoid being partially parsed by year
(str("-") >> match('[\dA-Z]').repeat(5).as(:part)) |
# 581.1978
year |
# IEC 62525-Edition 1.0 - 2007
edition.as(:edition) |
# 61691-6
part
).maybe >>
draft.as(:draft).maybe >>
(organization.as(:publisher) >> ((str("/ ") | str("/")) >> organization.as(:copublisher)).repeat)
.as(:organizations) >>
(draft_status.maybe >> (str(" ") >> type.as(:type) >> str(" ")).maybe).as(:type_status) >>
str(" ").maybe >> number_prefix >> number >> (part_subpart_year.maybe >> draft.as(:draft).maybe >>
edition.as(:edition).maybe >>
# dual-PubIDs
(str(" ") >>
((str("(") >> (identifier.as(:alternative) >> str(", ").maybe).repeat(1) >>
str(")")) | (str("and ") >> identifier.as(:alternative)) |
identifier.as(:alternative))
).maybe
dual_pubids.maybe).as(:parameters)
end

rule(:root) { identifier }
Expand Down
19 changes: 19 additions & 0 deletions lib/pubid/ieee/transformer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module Pubid::Ieee
class Transformer < Parslet::Transform
rule(draft: subtree(:draft)) do
result = draft
if draft[:month]

if draft[:year].length == 2
result[:year] = case draft[:year].to_i
when 0..25 then "20#{draft[:year]}"
when 26..99 then "19#{draft[:year]}"
end
end

result[:month] = Date.parse(draft[:month]).strftime("%B")
end
{ draft: result }
end
end
end
98 changes: 98 additions & 0 deletions spec/fixtures/pubid-parsed.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5570,3 +5570,101 @@ IEEE Unapproved Draft Std PC57.12.90_D8, Sep 2008
IEEE/IEC P62582-3_D1, April 2011
IEEE/IEC P62659_D2
IEEE/IEC P82079_D2, March 2017
IEEE Approved Draft Std P1115a/D4, Feb 2007
IEEE Approved Draft Std P1484.20.1/D7, May 07
IEEE Approved Draft Std P1516.4/Dx.1, Jun 07
IEEE Approved Draft Std P1542/D8, Aug 07
IEEE Approved Draft Std P1613a/D3.2 Nov 2007
IEEE Approved Draft Std P1617/D7.0, Jun 07
IEEE Approved Draft Std P1636.1/D3, May 07
IEEE Approved Draft Std P1661/D11, Jul 07
IEEE Approved Draft Std P495/D13, Aug 07
IEEE Approved Draft Std P532/D9.6, Apr 07
IEEE Approved Draft Std P741/D2, Apr 07
IEEE Approved Draft Std P99/D2.0, May 07
IEEE Approved Draft Std PC37.20.7/D14, Dec 06
IEEE Approved Draft Std PC37.230/D6.1, Jul 07
IEEE Approved Draft Std PC37.235/D4, Mar 07
IEEE Approved Draft Std PC57.12.36/D11, May 07
IEEE Approved Draft Std PC57.129/D10, Jul 07
IEEE P1048a/D002, February 2020
IEEE P1048a/D003, December 2020
IEEE P1538a/D3, February 2015
IEEE P1538a/D4, February 2015
IEEE P1538a/D5, July 2015
IEEE P1547a.1/D4, August 2014
IEEE P1547a/D1.3, December 2019
IEEE P1547a/D1.4, January 2020
IEEE P1547a/D2, June 2013
IEEE P1547a/D3, December 2013
IEEE P1588b/D1.0, August 2021
IEEE P1588b/D1.1, December 2021
IEEE P1657a/D4, January 2015
IEEE P1657a/D5, January 2015
IEEE P1708a/D2, February 2019
IEEE P1708a/D3, April 2019
IEEE P1801a/D01, May 2014
IEEE P1801a/D03, May 2014
IEEE P1857a/D1, December 2013
IEEE P1857a/D2, January 2014
IEEE P1901a/D3, December 2018
IEEE P1901b/D1.1, August 2021
IEEE P269a/D5.0, November 2011
IEEE P269a/D5.2, January 2012
IEEE P802.15.4q/D5.0, May, 20
IEEE P802c/D2.2, April 2017
IEEE P802d/D1.0 August, 2016
IEEE P802d/D1.1 September 17, 2016
IEEE P802d/D1.2, November 2016
IEEE P835a/D2, October 2012
IEEE Std 1012a-1998
IEEE Std 1394a-2000
IEEE Std 1613a-2008
IEEE Std 1616a
IEEE Std 1666 IEC61691-7 Edition 1.0 2009-12
IEEE Std 4a-2001
IEEE Std 524a-1993
IEEE Std 802a-2003
IEEE Std P1076c/D3.1
IEEE Std P1115a/D3
IEEE Std P1394c/D1.1
IEEE Std P1394c/D1.2
IEEE Std P1500/D11, Jan 06
IEEE Std P1528a/D2.0
IEEE Std P1528a/D2.1
IEEE Std P269a_D5
IEEE Std P269a_D8
IEEE Std P802a/D4
IEEE Std P802b/D3
IEEE Unapproved Draft Std P1003.1_D3, Jun 07
IEEE Unapproved Draft Std P1028/D5.1, Sep, 07
IEEE Unapproved Draft Std P1189/D15, Jun 07
IEEE Unapproved Draft Std P1407_D7, Mar 07
IEEE Unapproved Draft Std P1561/D16, Jul 07
IEEE Unapproved Draft Std P1610_D13, Aug 07
IEEE Unapproved Draft Std P1613a/D3.2, Nov 2007
IEEE Unapproved Draft Std P1616a/D3, Oct 2009
IEEE Unapproved Draft Std P1616a/D4, Jan 2010
IEEE Unapproved Draft Std P1619/D17, Jul 07
IEEE Unapproved Draft Std P1631/D2, Sep 07
IEEE Unapproved Draft Std P1636.1/D3, Jan 09
IEEE Unapproved Draft Std P1686_D4, Jun 07
IEEE Unapproved Draft Std P1900.2/D2.22, Jun 07
IEEE Unapproved Draft Std P1900_D01, Jun 07
IEEE Unapproved Draft Std P2600.1/D41b, Jan 09
IEEE Unapproved Draft Std P2600.3/D41b, Jan 09
IEEE Unapproved Draft Std P2600.4/D41b, Jan 09
IEEE Unapproved Draft Std P532/D9.6, May 07
IEEE Unapproved Draft Std P605a/D2, Sept 2009
IEEE Unapproved Draft Std P605a/D3, Nov 2009
IEEE Unapproved Draft Std P841/D5, Jan 09
IEEE Unapproved Draft Std P841_D3, Jul 07
IEEE Unapproved Draft Std PC37.1/D3, Jun 07
IEEE Unapproved Draft Std PC37.1/D5, Sep 07
IEEE Unapproved Draft Std PC37.12.1_D4.0, Sep 07
IEEE Unapproved Draft Std PC37.16/D13, Apr 08
IEEE Unapproved Draft Std PC37.43/D9, Oct 06
IEEE Unapproved Draft Std PC57.12.38/D6.3, Aug 08
IEEE Unapproved Draft Std PC57.13/D09, Aug 07
IEEEPC37.74/D12, May 2014
ISO/IEC/IEEE P42010_D8, June 2010
Loading

0 comments on commit b08ec69

Please sign in to comment.