Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve CJSON readability while maintaining size #5

Merged
merged 1 commit into from
Jan 6, 2025

Conversation

matterhorn103
Copy link
Contributor

Some CJSON files are formatted with consistent indentation of generally two spaces, but this makes them very long and hard to scan through (because each array is spread over as many lines as it has items). This is also inefficient from a space perspective. Even something as simple as acetone, for example, looks like:

{
  "chemicalJson": 1,
  "atoms": {
    "coords": {
      "3d": [
        1.802505,
        0.08453,
        1.255788,
        1.247226,
        0.06357,
        0.324074,
        1.474887,
        0.957917,
        -0.257488,
        1.572715,
        -0.78735,
        -0.275927,
        -0.233666,
        -0.02254,
        0.59907,
        -0.674746,
        -0.064218,
        1.730045,
        -1.135923,
        -0.052969,
        -0.60973,
        -0.987846,
        0.841217,
        -1.21666,
        -0.890646,
        -0.904126,
        -1.24637,
        -2.174517,
        -0.116041,
        -0.302793
      ]
    },
    "elements": {
      "number": [
        1,
        6,
        1,
        1,
        6,
        8,
        6,
        1,
        1,
        1
      ]
    },
    "formalCharges": [
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0
    ]
  },
  "partialCharges": {
    "Gasteiger": [
      0.192789,
      -0.474681,
      0.168066,
      0.16845,
      0.226622,
      -0.335922,
      -0.474645,
      0.168344,
      0.168176,
      0.1928
    ]
  },
  "bonds": {
    "connections": {
      "index": [
        8,
        6,
        7,
        6,
        6,
        9,
        6,
        4,
        3,
        1,
        2,
        1,
        1,
        4,
        1,
        0,
        4,
        5
      ]
    },
    "order": [
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      2
    ]
  },
  "vibrations": {
    "frequencies": [
      136.5,
      383.94,
      485.15,
      529.21,
      776.54,
      877.06,
      877.98,
      1080.88,
      1114.37,
      1224.41,
      1381.82,
      1383.69,
      1465.8,
      1466.57,
      1475.84,
      1491.89,
      1756.48,
      2994.74,
      3002.26,
      3052.84,
      3060.64,
      3119.3,
      3120.51
    ],
    "modes": [
      1,
      2,
      3,
      4,
      5,
      6,
      7,
      8,
      9,
      10,
      11,
      12,
      13,
      14,
      15,
      16,
      17,
      18,
      19,
      20,
      21,
      22,
      23
    ],
    "intensities": [
      1e-06,
      0.000305,
      7.2e-05,
      0.002438,
      0.000418,
      0.002123,
      1.7e-05,
      1e-06,
      0.000641,
      0.014832,
      0.012011,
      0.004519,
      0.000159,
      7e-06,
      0.006705,
      0.004643,
      0.033536,
      0.000533,
      0.002095,
      0.0,
      0.005232,
      0.003321,
      0.002348
    ],
    "eigenVectors": [
      [
        0.022756,
        -0.398597,
        -0.004629,
        0.002212,
        -0.037669,
        -0.00048,
        0.016237,
        0.194007,
        0.355946,
        -0.038265,
        0.198553,
        -0.352348,
        0.001524,
        -0.024424,
        -0.000378,
        -0.004332,
        0.075315,
        0.001022,
        0.002043,
        -0.037378,
        -0.000516,
        -0.274747,
        0.180823,
        0.248296,
        0.251272,
        0.218179,
        -0.241257,
        0.022646,
        -0.403115,
        -0.005863
      ],
      [
        0.196274,
        0.016017,
        -0.332489,
        -0.108953,
        -0.003664,
        -0.149005,
        -0.297801,
        -0.020218,
        -0.247853,
        -0.296686,
        -0.005798,
        -0.247293,
        -0.04696,
        -0.00434,
        0.120408,
        -0.047402,
        -0.004517,
        0.121398,
        0.180873,
        0.010465,
        -0.0354,
        0.385442,
        0.014304,
        0.020681,
        0.386819,
        0.028724,
        0.019991,
        0.080905,
        0.009336,
        -0.377305
      ],
      [
        -0.018647,
        0.317255,
        0.003943,
        -0.000689,
        0.020864,
        -0.000121,
        -0.399947,
        0.128972,
        0.002877,
        0.386284,
        0.173139,
        0.000621,
        0.015288,
        -0.273835,
        -0.004082,
        -0.006098,
        0.096295,
        0.001268,
        -0.000972,
        0.02074,
        0.000876,
        0.280736,
        0.162446,
        0.271665,
        -0.296394,
        0.139308,
        -0.265078,
        -0.017612,
        0.317669,
        0.005487
      ],
      [
        -0.476322,
        -0.030174,
        0.286719,
        -0.177818,
        -0.011501,
        0.109797,
        0.006224,
        0.006746,
        0.210765,
        0.007888,
        -0.010109,
        0.209386,
        -0.12549,
        -0.007453,
        -0.049187,
        0.290247,
        0.014831,
        0.11378,
        -0.056213,
        9e-06,
        -0.201586,
        0.148453,
        0.003324,
        -0.147621,
        0.146694,
        0.020204,
        -0.151149,
        -0.155799,
        0.000358,
        -0.534192
      ],
      [
        -0.498461,
        -0.030497,
        0.139272,
        -0.301915,
        -0.016764,
        0.030059,
        -0.22384,
        -0.012159,
        0.072338,
        -0.227633,
        -0.015437,
        0.072766,
        0.053039,
        0.004943,
        -0.136267,
        0.063488,
        0.006092,
        -0.162999,
        0.201476,
        0.007276,
        0.227712,
        0.11869,
        0.004869,
        0.208208,
        0.116707,
        0.00196,
        0.206313,
        0.271651,
        0.008722,
        0.440425
      ],
      [
        0.349312,
        -0.002142,
        -0.145296,
        -0.075322,
        0.004343,
        0.112729,
        -0.34087,
        -0.072433,
        -0.112405,
        -0.433834,
        0.007799,
        -0.093556,
        0.111696,
        0.005051,
        0.043671,
        0.017917,
        0.000912,
        0.00693,
        0.021988,
        -0.006527,
        -0.133409,
        -0.32921,
        0.038756,
        -0.149392,
        -0.381422,
        -0.048308,
        -0.22649,
        0.156216,
        0.027682,
        0.343192
      ],
      [
        0.012054,
        0.23351,
        -0.008117,
        -0.001084,
        -0.096396,
        0.007502,
        -0.478265,
        0.0842,
        0.078744,
        0.406068,
        0.142155,
        -0.091585,
        0.008944,
        0.000193,
        0.003411,
        0.001496,
        0.000121,
        0.00051,
        -0.003222,
        0.096352,
        -0.008999,
        -0.290094,
        -0.121365,
        -0.380809,
        0.244988,
        -0.110753,
        0.346246,
        0.026235,
        -0.231435,
        0.024636
      ],
      [
        -0.483762,
        -0.030257,
        0.156337,
        -0.019856,
        0.001034,
        -0.129631,
        0.295967,
        0.081802,
        0.12599,
        0.298282,
        -0.052167,
        0.125694,
        -0.011797,
        -0.001229,
        0.030529,
        -0.037445,
        -0.003524,
        0.095909,
        0.102321,
        0.006876,
        -0.081641,
        -0.307058,
        0.051676,
        -0.10825,
        -0.30071,
        -0.081672,
        -0.111502,
        0.249548,
        0.006952,
        0.443138
      ],
      [
        0.01834,
        -0.295018,
        -0.003678,
        -0.006897,
        0.128697,
        0.001892,
        0.397551,
        -0.083224,
        -0.139584,
        -0.38766,
        -0.129527,
        0.136054,
        0.012344,
        -0.227384,
        -0.003618,
        -0.002269,
        0.041548,
        0.000616,
        -0.007179,
        0.128472,
        0.002246,
        -0.188724,
        -0.110732,
        -0.369328,
        0.200924,
        -0.101438,
        0.365914,
        0.016228,
        -0.294425,
        -0.005356
      ],
      [
        0.548308,
        0.033966,
        -0.195033,
        0.084884,
        0.003449,
        0.083497,
        0.055488,
        -0.117176,
        -0.141968,
        0.044071,
        0.126308,
        -0.140246,
        -0.346301,
        -0.016973,
        -0.135684,
        0.056481,
        0.002751,
        0.022161,
        0.119047,
        0.006605,
        -0.003664,
        -0.065959,
        0.116175,
        0.139682,
        -0.053204,
        -0.127288,
        0.134921,
        0.271268,
        0.006792,
        0.516412
      ],
      [
        0.233378,
        0.013272,
        -0.186574,
        -0.107463,
        -0.005946,
        0.010234,
        0.364901,
        -0.126223,
        -0.004488,
        0.349832,
        0.165798,
        0.000245,
        0.065084,
        0.003317,
        0.026157,
        -0.008875,
        -0.000465,
        -0.003607,
        -0.082367,
        -0.002789,
        -0.096984,
        0.299955,
        0.195196,
        0.306013,
        0.318188,
        -0.171507,
        0.299991,
        0.061036,
        -0.004592,
        0.364068
      ],
      [
        -0.313784,
        -0.017689,
        0.217713,
        0.104539,
        0.005903,
        -0.025915,
        -0.423999,
        0.177505,
        0.052703,
        -0.403042,
        -0.224181,
        0.046327,
        -0.007971,
        -0.000522,
        0.00342,
        0.001261,
        8.5e-05,
        -0.000933,
        -0.045521,
        -0.001117,
        -0.074496,
        0.210519,
        0.179623,
        0.272247,
        0.228051,
        -0.164797,
        0.266797,
        0.073988,
        -0.002618,
        0.314733
      ],
      [
        -0.248064,
        -0.083496,
        0.179555,
        0.015343,
        0.00637,
        0.032471,
        0.011567,
        -0.258917,
        -0.389149,
        0.046549,
        0.258587,
        -0.32599,
        0.016093,
        0.000765,
        0.006398,
        -0.004889,
        -0.000259,
        -0.002056,
        0.03409,
        -0.004028,
        -0.013245,
        -0.28346,
        0.244741,
        0.287408,
        -0.162237,
        -0.267594,
        0.280351,
        -0.067555,
        0.073768,
        -0.304872
      ],
      [
        -0.067114,
        0.510277,
        0.03306,
        0.004718,
        -0.043871,
        0.004614,
        0.24914,
        0.013756,
        0.163641,
        -0.243676,
        0.071848,
        -0.274356,
        0.002456,
        0.000325,
        0.000473,
        -0.000957,
        -8e-05,
        0.000412,
        0.001552,
        0.044595,
        -0.000693,
        0.306059,
        0.004185,
        0.037753,
        -0.354359,
        -0.093002,
        0.026644,
        0.021149,
        -0.518293,
        -0.045648
      ],
      [
        0.260613,
        0.028525,
        -0.189127,
        -0.023603,
        -0.001959,
        -0.033108,
        -0.00786,
        0.247959,
        0.37345,
        0.008632,
        -0.253782,
        0.358035,
        -0.009639,
        -0.00111,
        0.024651,
        0.013863,
        0.001324,
        -0.03551,
        0.039723,
        0.003095,
        -0.008104,
        -0.256662,
        0.231159,
        0.271742,
        -0.238515,
        -0.26923,
        0.263554,
        -0.06347,
        -0.005962,
        -0.3167
      ],
      [
        0.02732,
        -0.503933,
        -0.004824,
        -0.001529,
        0.033096,
        0.000265,
        -0.278006,
        -0.047963,
        -0.213773,
        0.280128,
        -0.022323,
        0.213457,
        -0.002085,
        0.033356,
        0.000557,
        0.000229,
        -0.002442,
        -0.000205,
        -0.001346,
        0.032571,
        0.000689,
        0.344814,
        -0.014413,
        0.033611,
        -0.345849,
        -0.055123,
        -0.031571,
        0.02706,
        -0.497422,
        -0.011647
      ],
      [
        -0.243029,
        -0.015413,
        0.146133,
        0.017338,
        0.001597,
        -0.034753,
        0.141987,
        -0.108124,
        -0.13228,
        0.127118,
        0.125745,
        -0.129654,
        -0.244286,
        -0.023175,
        0.626421,
        0.156923,
        0.014852,
        -0.402382,
        0.010765,
        0.001148,
        -0.037351,
        -0.003716,
        -0.114262,
        -0.189468,
        -0.016759,
        0.119611,
        -0.185873,
        0.079683,
        0.000158,
        0.2727
      ],
      [
        -0.129992,
        -0.004831,
        -0.23421,
        0.030252,
        0.001661,
        -0.020953,
        -0.089179,
        -0.380433,
        0.238575,
        -0.131994,
        0.366,
        0.248641,
        -0.001901,
        -9.9e-05,
        -0.000742,
        0.000266,
        1.4e-05,
        0.000101,
        0.00813,
        5.1e-05,
        0.036216,
        0.063105,
        0.383908,
        -0.248672,
        0.105684,
        -0.368443,
        -0.263388,
        -0.256541,
        -0.015635,
        0.08443
      ],
      [
        0.137464,
        0.005143,
        0.242566,
        -0.030797,
        -0.00168,
        0.020636,
        0.088352,
        0.381125,
        -0.239902,
        0.131275,
        -0.366799,
        -0.25004,
        -0.00032,
        -2.5e-05,
        0.000843,
        0.000346,
        3.2e-05,
        -0.000892,
        0.008606,
        7.2e-05,
        0.03577,
        0.063585,
        0.378623,
        -0.24516,
        0.10549,
        -0.363122,
        -0.259586,
        -0.263422,
        -0.01603,
        0.084123
      ],
      [
        -0.00225,
        0.015119,
        -0.002221,
        -0.003407,
        0.064135,
        0.000736,
        -0.105943,
        -0.405293,
        0.273762,
        0.149342,
        -0.382645,
        -0.28043,
        -7e-06,
        -0.0,
        -3e-06,
        -1e-06,
        -0.0,
        1e-06,
        0.003707,
        -0.063847,
        -0.001032,
        0.071628,
        0.403343,
        -0.283035,
        -0.114852,
        0.381242,
        0.295054,
        -0.001409,
        -0.015189,
        0.000429
      ],
      [
        -0.002191,
        0.012816,
        -0.002365,
        -0.003392,
        0.063928,
        0.000754,
        -0.105407,
        -0.403473,
        0.272501,
        0.148733,
        -0.381208,
        -0.279259,
        -9.2e-05,
        0.001439,
        2.3e-05,
        -1.1e-05,
        0.000198,
        3e-06,
        -0.00376,
        0.064192,
        0.001056,
        -0.071941,
        -0.405068,
        0.284141,
        0.115391,
        -0.383021,
        -0.296342,
        0.00191,
        0.013037,
        -0.000561
      ],
      [
        0.365531,
        0.013775,
        0.617789,
        -0.021935,
        -0.000328,
        -0.068003,
        -0.04347,
        -0.170503,
        0.098247,
        -0.061408,
        0.160508,
        0.100279,
        -0.000658,
        -3e-05,
        -0.000683,
        0.000301,
        1.5e-05,
        0.000279,
        -0.050627,
        -0.003436,
        0.028616,
        0.016073,
        0.137715,
        -0.086155,
        0.030639,
        -0.130508,
        -0.089509,
        0.560311,
        0.034,
        -0.167624
      ],
      [
        -0.296552,
        -0.011183,
        -0.500391,
        0.018052,
        0.000246,
        0.055379,
        0.037985,
        0.144136,
        -0.08304,
        0.052984,
        -0.135246,
        -0.084546,
        -0.001503,
        -0.000141,
        0.003611,
        0.000578,
        5.2e-05,
        -0.001372,
        -0.062213,
        -0.004237,
        0.034918,
        0.019339,
        0.175032,
        -0.110577,
        0.037781,
        -0.165787,
        -0.114727,
        0.683407,
        0.041462,
        -0.203925
      ]
    ]
  },
  "properties": {
    "name": "acetone.out.gz",
    "totalCharge": 0,
    "totalSpinMultiplicity": 1
  }
}

Some CJSON files have no indentation at all, with all the data on a single line. This results in an extremely long line length, which some editors complain about, and is extremely unreadable. It is at least space efficient. For acetone:

{"chemicalJson": 1, "atoms": {"coords": {"3d": [1.802505, 0.08453, 1.255788, 1.247226, 0.06357, 0.324074, 1.474887, 0.957917, -0.257488, 1.572715, -0.78735, -0.275927, -0.233666, -0.02254, 0.59907, -0.674746, -0.064218, 1.730045, -1.135923, -0.052969, -0.60973, -0.987846, 0.841217, -1.21666, -0.890646, -0.904126, -1.24637, -2.174517, -0.116041, -0.302793]}, "elements": {"number": [1, 6, 1, 1, 6, 8, 6, 1, 1, 1]}, "formalCharges": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}, "partialCharges": {"Gasteiger": [0.192789, -0.474681, 0.168066, 0.16845, 0.226622, -0.335922, -0.474645, 0.168344, 0.168176, 0.1928]}, "bonds": {"connections": {"index": [8, 6, 7, 6, 6, 9, 6, 4, 3, 1, 2, 1, 1, 4, 1, 0, 4, 5]}, "order": [1, 1, 1, 1, 1, 1, 1, 1, 2]}, "vibrations": {"frequencies": [136.5, 383.94, 485.15, 529.21, 776.54, 877.06, 877.98, 1080.88, 1114.37, 1224.41, 1381.82, 1383.69, 1465.8, 1466.57, 1475.84, 1491.89, 1756.48, 2994.74, 3002.26, 3052.84, 3060.64, 3119.3, 3120.51], "modes": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], "intensities": [1e-06, 0.000305, 7.2e-05, 0.002438, 0.000418, 0.002123, 1.7e-05, 1e-06, 0.000641, 0.014832, 0.012011, 0.004519, 0.000159, 7e-06, 0.006705, 0.004643, 0.033536, 0.000533, 0.002095, 0.0, 0.005232, 0.003321, 0.002348], "eigenVectors": [[0.022756, -0.398597, -0.004629, 0.002212, -0.037669, -0.00048, 0.016237, 0.194007, 0.355946, -0.038265, 0.198553, -0.352348, 0.001524, -0.024424, -0.000378, -0.004332, 0.075315, 0.001022, 0.002043, -0.037378, -0.000516, -0.274747, 0.180823, 0.248296, 0.251272, 0.218179, -0.241257, 0.022646, -0.403115, -0.005863], [0.196274, 0.016017, -0.332489, -0.108953, -0.003664, -0.149005, -0.297801, -0.020218, -0.247853, -0.296686, -0.005798, -0.247293, -0.04696, -0.00434, 0.120408, -0.047402, -0.004517, 0.121398, 0.180873, 0.010465, -0.0354, 0.385442, 0.014304, 0.020681, 0.386819, 0.028724, 0.019991, 0.080905, 0.009336, -0.377305], [-0.018647, 0.317255, 0.003943, -0.000689, 0.020864, -0.000121, -0.399947, 0.128972, 0.002877, 0.386284, 0.173139, 0.000621, 0.015288, -0.273835, -0.004082, -0.006098, 0.096295, 0.001268, -0.000972, 0.02074, 0.000876, 0.280736, 0.162446, 0.271665, -0.296394, 0.139308, -0.265078, -0.017612, 0.317669, 0.005487], [-0.476322, -0.030174, 0.286719, -0.177818, -0.011501, 0.109797, 0.006224, 0.006746, 0.210765, 0.007888, -0.010109, 0.209386, -0.12549, -0.007453, -0.049187, 0.290247, 0.014831, 0.11378, -0.056213, 9e-06, -0.201586, 0.148453, 0.003324, -0.147621, 0.146694, 0.020204, -0.151149, -0.155799, 0.000358, -0.534192], [-0.498461, -0.030497, 0.139272, -0.301915, -0.016764, 0.030059, -0.22384, -0.012159, 0.072338, -0.227633, -0.015437, 0.072766, 0.053039, 0.004943, -0.136267, 0.063488, 0.006092, -0.162999, 0.201476, 0.007276, 0.227712, 0.11869, 0.004869, 0.208208, 0.116707, 0.00196, 0.206313, 0.271651, 0.008722, 0.440425], [0.349312, -0.002142, -0.145296, -0.075322, 0.004343, 0.112729, -0.34087, -0.072433, -0.112405, -0.433834, 0.007799, -0.093556, 0.111696, 0.005051, 0.043671, 0.017917, 0.000912, 0.00693, 0.021988, -0.006527, -0.133409, -0.32921, 0.038756, -0.149392, -0.381422, -0.048308, -0.22649, 0.156216, 0.027682, 0.343192], [0.012054, 0.23351, -0.008117, -0.001084, -0.096396, 0.007502, -0.478265, 0.0842, 0.078744, 0.406068, 0.142155, -0.091585, 0.008944, 0.000193, 0.003411, 0.001496, 0.000121, 0.00051, -0.003222, 0.096352, -0.008999, -0.290094, -0.121365, -0.380809, 0.244988, -0.110753, 0.346246, 0.026235, -0.231435, 0.024636], [-0.483762, -0.030257, 0.156337, -0.019856, 0.001034, -0.129631, 0.295967, 0.081802, 0.12599, 0.298282, -0.052167, 0.125694, -0.011797, -0.001229, 0.030529, -0.037445, -0.003524, 0.095909, 0.102321, 0.006876, -0.081641, -0.307058, 0.051676, -0.10825, -0.30071, -0.081672, -0.111502, 0.249548, 0.006952, 0.443138], [0.01834, -0.295018, -0.003678, -0.006897, 0.128697, 0.001892, 0.397551, -0.083224, -0.139584, -0.38766, -0.129527, 0.136054, 0.012344, -0.227384, -0.003618, -0.002269, 0.041548, 0.000616, -0.007179, 0.128472, 0.002246, -0.188724, -0.110732, -0.369328, 0.200924, -0.101438, 0.365914, 0.016228, -0.294425, -0.005356], [0.548308, 0.033966, -0.195033, 0.084884, 0.003449, 0.083497, 0.055488, -0.117176, -0.141968, 0.044071, 0.126308, -0.140246, -0.346301, -0.016973, -0.135684, 0.056481, 0.002751, 0.022161, 0.119047, 0.006605, -0.003664, -0.065959, 0.116175, 0.139682, -0.053204, -0.127288, 0.134921, 0.271268, 0.006792, 0.516412], [0.233378, 0.013272, -0.186574, -0.107463, -0.005946, 0.010234, 0.364901, -0.126223, -0.004488, 0.349832, 0.165798, 0.000245, 0.065084, 0.003317, 0.026157, -0.008875, -0.000465, -0.003607, -0.082367, -0.002789, -0.096984, 0.299955, 0.195196, 0.306013, 0.318188, -0.171507, 0.299991, 0.061036, -0.004592, 0.364068], [-0.313784, -0.017689, 0.217713, 0.104539, 0.005903, -0.025915, -0.423999, 0.177505, 0.052703, -0.403042, -0.224181, 0.046327, -0.007971, -0.000522, 0.00342, 0.001261, 8.5e-05, -0.000933, -0.045521, -0.001117, -0.074496, 0.210519, 0.179623, 0.272247, 0.228051, -0.164797, 0.266797, 0.073988, -0.002618, 0.314733], [-0.248064, -0.083496, 0.179555, 0.015343, 0.00637, 0.032471, 0.011567, -0.258917, -0.389149, 0.046549, 0.258587, -0.32599, 0.016093, 0.000765, 0.006398, -0.004889, -0.000259, -0.002056, 0.03409, -0.004028, -0.013245, -0.28346, 0.244741, 0.287408, -0.162237, -0.267594, 0.280351, -0.067555, 0.073768, -0.304872], [-0.067114, 0.510277, 0.03306, 0.004718, -0.043871, 0.004614, 0.24914, 0.013756, 0.163641, -0.243676, 0.071848, -0.274356, 0.002456, 0.000325, 0.000473, -0.000957, -8e-05, 0.000412, 0.001552, 0.044595, -0.000693, 0.306059, 0.004185, 0.037753, -0.354359, -0.093002, 0.026644, 0.021149, -0.518293, -0.045648], [0.260613, 0.028525, -0.189127, -0.023603, -0.001959, -0.033108, -0.00786, 0.247959, 0.37345, 0.008632, -0.253782, 0.358035, -0.009639, -0.00111, 0.024651, 0.013863, 0.001324, -0.03551, 0.039723, 0.003095, -0.008104, -0.256662, 0.231159, 0.271742, -0.238515, -0.26923, 0.263554, -0.06347, -0.005962, -0.3167], [0.02732, -0.503933, -0.004824, -0.001529, 0.033096, 0.000265, -0.278006, -0.047963, -0.213773, 0.280128, -0.022323, 0.213457, -0.002085, 0.033356, 0.000557, 0.000229, -0.002442, -0.000205, -0.001346, 0.032571, 0.000689, 0.344814, -0.014413, 0.033611, -0.345849, -0.055123, -0.031571, 0.02706, -0.497422, -0.011647], [-0.243029, -0.015413, 0.146133, 0.017338, 0.001597, -0.034753, 0.141987, -0.108124, -0.13228, 0.127118, 0.125745, -0.129654, -0.244286, -0.023175, 0.626421, 0.156923, 0.014852, -0.402382, 0.010765, 0.001148, -0.037351, -0.003716, -0.114262, -0.189468, -0.016759, 0.119611, -0.185873, 0.079683, 0.000158, 0.2727], [-0.129992, -0.004831, -0.23421, 0.030252, 0.001661, -0.020953, -0.089179, -0.380433, 0.238575, -0.131994, 0.366, 0.248641, -0.001901, -9.9e-05, -0.000742, 0.000266, 1.4e-05, 0.000101, 0.00813, 5.1e-05, 0.036216, 0.063105, 0.383908, -0.248672, 0.105684, -0.368443, -0.263388, -0.256541, -0.015635, 0.08443], [0.137464, 0.005143, 0.242566, -0.030797, -0.00168, 0.020636, 0.088352, 0.381125, -0.239902, 0.131275, -0.366799, -0.25004, -0.00032, -2.5e-05, 0.000843, 0.000346, 3.2e-05, -0.000892, 0.008606, 7.2e-05, 0.03577, 0.063585, 0.378623, -0.24516, 0.10549, -0.363122, -0.259586, -0.263422, -0.01603, 0.084123], [-0.00225, 0.015119, -0.002221, -0.003407, 0.064135, 0.000736, -0.105943, -0.405293, 0.273762, 0.149342, -0.382645, -0.28043, -7e-06, -0.0, -3e-06, -1e-06, -0.0, 1e-06, 0.003707, -0.063847, -0.001032, 0.071628, 0.403343, -0.283035, -0.114852, 0.381242, 0.295054, -0.001409, -0.015189, 0.000429], [-0.002191, 0.012816, -0.002365, -0.003392, 0.063928, 0.000754, -0.105407, -0.403473, 0.272501, 0.148733, -0.381208, -0.279259, -9.2e-05, 0.001439, 2.3e-05, -1.1e-05, 0.000198, 3e-06, -0.00376, 0.064192, 0.001056, -0.071941, -0.405068, 0.284141, 0.115391, -0.383021, -0.296342, 0.00191, 0.013037, -0.000561], [0.365531, 0.013775, 0.617789, -0.021935, -0.000328, -0.068003, -0.04347, -0.170503, 0.098247, -0.061408, 0.160508, 0.100279, -0.000658, -3e-05, -0.000683, 0.000301, 1.5e-05, 0.000279, -0.050627, -0.003436, 0.028616, 0.016073, 0.137715, -0.086155, 0.030639, -0.130508, -0.089509, 0.560311, 0.034, -0.167624], [-0.296552, -0.011183, -0.500391, 0.018052, 0.000246, 0.055379, 0.037985, 0.144136, -0.08304, 0.052984, -0.135246, -0.084546, -0.001503, -0.000141, 0.003611, 0.000578, 5.2e-05, -0.001372, -0.062213, -0.004237, 0.034918, 0.019339, 0.175032, -0.110577, 0.037781, -0.165787, -0.114727, 0.683407, 0.041462, -0.203925]]}, "properties": {"name": "acetone.out.gz", "totalCharge": 0, "totalSpinMultiplicity": 1}}

I wrote a short Python script which takes a middle route: indentation, but arrays of simple values (no nested arrays or objects) are flattened and printed on a single line. The result for acetone:

{
  "chemicalJson": 1,
  "atoms": {
    "coords": {
      "3d": [1.802505, 0.08453, 1.255788, 1.247226, 0.06357, 0.324074, 1.474887, 0.957917, -0.257488, 1.572715, -0.78735, -0.275927, -0.233666, -0.02254, 0.59907, -0.674746, -0.064218, 1.730045, -1.135923, -0.052969, -0.60973, -0.987846, 0.841217, -1.21666, -0.890646, -0.904126, -1.24637, -2.174517, -0.116041, -0.302793]
    },
    "elements": {
      "number": [1, 6, 1, 1, 6, 8, 6, 1, 1, 1]
    },
    "formalCharges": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
  },
  "partialCharges": {
    "Gasteiger": [0.192789, -0.474681, 0.168066, 0.16845, 0.226622, -0.335922, -0.474645, 0.168344, 0.168176, 0.1928]
  },
  "bonds": {
    "connections": {
      "index": [8, 6, 7, 6, 6, 9, 6, 4, 3, 1, 2, 1, 1, 4, 1, 0, 4, 5]
    },
    "order": [1, 1, 1, 1, 1, 1, 1, 1, 2]
  },
  "vibrations": {
    "frequencies": [136.5, 383.94, 485.15, 529.21, 776.54, 877.06, 877.98, 1080.88, 1114.37, 1224.41, 1381.82, 1383.69, 1465.8, 1466.57, 1475.84, 1491.89, 1756.48, 2994.74, 3002.26, 3052.84, 3060.64, 3119.3, 3120.51],
    "modes": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
    "intensities": [1e-06, 0.000305, 7.2e-05, 0.002438, 0.000418, 0.002123, 1.7e-05, 1e-06, 0.000641, 0.014832, 0.012011, 0.004519, 0.000159, 7e-06, 0.006705, 0.004643, 0.033536, 0.000533, 0.002095, 0.0, 0.005232, 0.003321, 0.002348],
    "eigenVectors": [
      [0.022756, -0.398597, -0.004629, 0.002212, -0.037669, -0.00048, 0.016237, 0.194007, 0.355946, -0.038265, 0.198553, -0.352348, 0.001524, -0.024424, -0.000378, -0.004332, 0.075315, 0.001022, 0.002043, -0.037378, -0.000516, -0.274747, 0.180823, 0.248296, 0.251272, 0.218179, -0.241257, 0.022646, -0.403115, -0.005863],
      [0.196274, 0.016017, -0.332489, -0.108953, -0.003664, -0.149005, -0.297801, -0.020218, -0.247853, -0.296686, -0.005798, -0.247293, -0.04696, -0.00434, 0.120408, -0.047402, -0.004517, 0.121398, 0.180873, 0.010465, -0.0354, 0.385442, 0.014304, 0.020681, 0.386819, 0.028724, 0.019991, 0.080905, 0.009336, -0.377305],
      [-0.018647, 0.317255, 0.003943, -0.000689, 0.020864, -0.000121, -0.399947, 0.128972, 0.002877, 0.386284, 0.173139, 0.000621, 0.015288, -0.273835, -0.004082, -0.006098, 0.096295, 0.001268, -0.000972, 0.02074, 0.000876, 0.280736, 0.162446, 0.271665, -0.296394, 0.139308, -0.265078, -0.017612, 0.317669, 0.005487],
      [-0.476322, -0.030174, 0.286719, -0.177818, -0.011501, 0.109797, 0.006224, 0.006746, 0.210765, 0.007888, -0.010109, 0.209386, -0.12549, -0.007453, -0.049187, 0.290247, 0.014831, 0.11378, -0.056213, 9e-06, -0.201586, 0.148453, 0.003324, -0.147621, 0.146694, 0.020204, -0.151149, -0.155799, 0.000358, -0.534192],
      [-0.498461, -0.030497, 0.139272, -0.301915, -0.016764, 0.030059, -0.22384, -0.012159, 0.072338, -0.227633, -0.015437, 0.072766, 0.053039, 0.004943, -0.136267, 0.063488, 0.006092, -0.162999, 0.201476, 0.007276, 0.227712, 0.11869, 0.004869, 0.208208, 0.116707, 0.00196, 0.206313, 0.271651, 0.008722, 0.440425],
      [0.349312, -0.002142, -0.145296, -0.075322, 0.004343, 0.112729, -0.34087, -0.072433, -0.112405, -0.433834, 0.007799, -0.093556, 0.111696, 0.005051, 0.043671, 0.017917, 0.000912, 0.00693, 0.021988, -0.006527, -0.133409, -0.32921, 0.038756, -0.149392, -0.381422, -0.048308, -0.22649, 0.156216, 0.027682, 0.343192],
      [0.012054, 0.23351, -0.008117, -0.001084, -0.096396, 0.007502, -0.478265, 0.0842, 0.078744, 0.406068, 0.142155, -0.091585, 0.008944, 0.000193, 0.003411, 0.001496, 0.000121, 0.00051, -0.003222, 0.096352, -0.008999, -0.290094, -0.121365, -0.380809, 0.244988, -0.110753, 0.346246, 0.026235, -0.231435, 0.024636],
      [-0.483762, -0.030257, 0.156337, -0.019856, 0.001034, -0.129631, 0.295967, 0.081802, 0.12599, 0.298282, -0.052167, 0.125694, -0.011797, -0.001229, 0.030529, -0.037445, -0.003524, 0.095909, 0.102321, 0.006876, -0.081641, -0.307058, 0.051676, -0.10825, -0.30071, -0.081672, -0.111502, 0.249548, 0.006952, 0.443138],
      [0.01834, -0.295018, -0.003678, -0.006897, 0.128697, 0.001892, 0.397551, -0.083224, -0.139584, -0.38766, -0.129527, 0.136054, 0.012344, -0.227384, -0.003618, -0.002269, 0.041548, 0.000616, -0.007179, 0.128472, 0.002246, -0.188724, -0.110732, -0.369328, 0.200924, -0.101438, 0.365914, 0.016228, -0.294425, -0.005356],
      [0.548308, 0.033966, -0.195033, 0.084884, 0.003449, 0.083497, 0.055488, -0.117176, -0.141968, 0.044071, 0.126308, -0.140246, -0.346301, -0.016973, -0.135684, 0.056481, 0.002751, 0.022161, 0.119047, 0.006605, -0.003664, -0.065959, 0.116175, 0.139682, -0.053204, -0.127288, 0.134921, 0.271268, 0.006792, 0.516412],
      [0.233378, 0.013272, -0.186574, -0.107463, -0.005946, 0.010234, 0.364901, -0.126223, -0.004488, 0.349832, 0.165798, 0.000245, 0.065084, 0.003317, 0.026157, -0.008875, -0.000465, -0.003607, -0.082367, -0.002789, -0.096984, 0.299955, 0.195196, 0.306013, 0.318188, -0.171507, 0.299991, 0.061036, -0.004592, 0.364068],
      [-0.313784, -0.017689, 0.217713, 0.104539, 0.005903, -0.025915, -0.423999, 0.177505, 0.052703, -0.403042, -0.224181, 0.046327, -0.007971, -0.000522, 0.00342, 0.001261, 8.5e-05, -0.000933, -0.045521, -0.001117, -0.074496, 0.210519, 0.179623, 0.272247, 0.228051, -0.164797, 0.266797, 0.073988, -0.002618, 0.314733],
      [-0.248064, -0.083496, 0.179555, 0.015343, 0.00637, 0.032471, 0.011567, -0.258917, -0.389149, 0.046549, 0.258587, -0.32599, 0.016093, 0.000765, 0.006398, -0.004889, -0.000259, -0.002056, 0.03409, -0.004028, -0.013245, -0.28346, 0.244741, 0.287408, -0.162237, -0.267594, 0.280351, -0.067555, 0.073768, -0.304872],
      [-0.067114, 0.510277, 0.03306, 0.004718, -0.043871, 0.004614, 0.24914, 0.013756, 0.163641, -0.243676, 0.071848, -0.274356, 0.002456, 0.000325, 0.000473, -0.000957, -8e-05, 0.000412, 0.001552, 0.044595, -0.000693, 0.306059, 0.004185, 0.037753, -0.354359, -0.093002, 0.026644, 0.021149, -0.518293, -0.045648],
      [0.260613, 0.028525, -0.189127, -0.023603, -0.001959, -0.033108, -0.00786, 0.247959, 0.37345, 0.008632, -0.253782, 0.358035, -0.009639, -0.00111, 0.024651, 0.013863, 0.001324, -0.03551, 0.039723, 0.003095, -0.008104, -0.256662, 0.231159, 0.271742, -0.238515, -0.26923, 0.263554, -0.06347, -0.005962, -0.3167],
      [0.02732, -0.503933, -0.004824, -0.001529, 0.033096, 0.000265, -0.278006, -0.047963, -0.213773, 0.280128, -0.022323, 0.213457, -0.002085, 0.033356, 0.000557, 0.000229, -0.002442, -0.000205, -0.001346, 0.032571, 0.000689, 0.344814, -0.014413, 0.033611, -0.345849, -0.055123, -0.031571, 0.02706, -0.497422, -0.011647],
      [-0.243029, -0.015413, 0.146133, 0.017338, 0.001597, -0.034753, 0.141987, -0.108124, -0.13228, 0.127118, 0.125745, -0.129654, -0.244286, -0.023175, 0.626421, 0.156923, 0.014852, -0.402382, 0.010765, 0.001148, -0.037351, -0.003716, -0.114262, -0.189468, -0.016759, 0.119611, -0.185873, 0.079683, 0.000158, 0.2727],
      [-0.129992, -0.004831, -0.23421, 0.030252, 0.001661, -0.020953, -0.089179, -0.380433, 0.238575, -0.131994, 0.366, 0.248641, -0.001901, -9.9e-05, -0.000742, 0.000266, 1.4e-05, 0.000101, 0.00813, 5.1e-05, 0.036216, 0.063105, 0.383908, -0.248672, 0.105684, -0.368443, -0.263388, -0.256541, -0.015635, 0.08443],
      [0.137464, 0.005143, 0.242566, -0.030797, -0.00168, 0.020636, 0.088352, 0.381125, -0.239902, 0.131275, -0.366799, -0.25004, -0.00032, -2.5e-05, 0.000843, 0.000346, 3.2e-05, -0.000892, 0.008606, 7.2e-05, 0.03577, 0.063585, 0.378623, -0.24516, 0.10549, -0.363122, -0.259586, -0.263422, -0.01603, 0.084123],
      [-0.00225, 0.015119, -0.002221, -0.003407, 0.064135, 0.000736, -0.105943, -0.405293, 0.273762, 0.149342, -0.382645, -0.28043, -7e-06, -0.0, -3e-06, -1e-06, -0.0, 1e-06, 0.003707, -0.063847, -0.001032, 0.071628, 0.403343, -0.283035, -0.114852, 0.381242, 0.295054, -0.001409, -0.015189, 0.000429],
      [-0.002191, 0.012816, -0.002365, -0.003392, 0.063928, 0.000754, -0.105407, -0.403473, 0.272501, 0.148733, -0.381208, -0.279259, -9.2e-05, 0.001439, 2.3e-05, -1.1e-05, 0.000198, 3e-06, -0.00376, 0.064192, 0.001056, -0.071941, -0.405068, 0.284141, 0.115391, -0.383021, -0.296342, 0.00191, 0.013037, -0.000561],
      [0.365531, 0.013775, 0.617789, -0.021935, -0.000328, -0.068003, -0.04347, -0.170503, 0.098247, -0.061408, 0.160508, 0.100279, -0.000658, -3e-05, -0.000683, 0.000301, 1.5e-05, 0.000279, -0.050627, -0.003436, 0.028616, 0.016073, 0.137715, -0.086155, 0.030639, -0.130508, -0.089509, 0.560311, 0.034, -0.167624],
      [-0.296552, -0.011183, -0.500391, 0.018052, 0.000246, 0.055379, 0.037985, 0.144136, -0.08304, 0.052984, -0.135246, -0.084546, -0.001503, -0.000141, 0.003611, 0.000578, 5.2e-05, -0.001372, -0.062213, -0.004237, 0.034918, 0.019339, 0.175032, -0.110577, 0.037781, -0.165787, -0.114727, 0.683407, 0.041462, -0.203925]
    ]
  },
  "properties": {
    "name": "acetone.out.gz",
    "totalCharge": 0,
    "totalSpinMultiplicity": 1
  }
}

Crucially, this doesn't really increase the size of the repo (unlike universal indentation) -- fragments goes down in size by 0.3 MiB, while molecules increases by 0.2 MiB.

@matterhorn103
Copy link
Contributor Author

For reference, the script used for this was the following, which includes a validation step to make sure that the newly formatted CJSON files are parsed to the exact same JSON object as before:

import json
from pathlib import Path


def recursive_search(path: Path):
    file_list = []
    file_list.extend([x for x in path.iterdir() if x.is_file()])
    for d in [x for x in path.iterdir() if x.is_dir()]:
        file_list.extend(recursive_search(d))
    return file_list

# Get all CJSON files in repo
root = Path(__file__).parent
print(root)
file_list = recursive_search(root)
cjson_list = [f for f in file_list if f.suffix == ".cjson"]

def flatten_arrays(data: dict) -> dict:
    """Turn any lists of simple items (not dicts or lists) into strings."""
    if isinstance(data, list):
        # Turn simple lists into flat strings
        if all(not isinstance(i, (dict, list)) for i in data):
            return json.dumps(data)
        # Recursively flatten any nested lists
        else:
            items = [flatten_arrays(i) for i in data]
            return items
    elif isinstance(data, dict):
        # Recursively flatten all entries
        new = {k: flatten_arrays(v) for k, v in data.items()}
        return new
    else:
        return data
    
def flatten_dumps(data: dict) -> str:
    """Do the same as json.dumps() but write simple lists on a single line."""
    flattened = flatten_arrays(data)
    # Lists are now strings, remove quotes to turn them back into lists
    output = json.dumps(flattened, indent=2).replace('"[', '[').replace(']"', ']')
    # Any strings within lists will have had their quotes escaped, so get rid of escapes
    output = output.replace(r'\"', '"')
    return output

checks = {}

# Read then write each cjson
for file in cjson_list:
    with open(file) as f:
        cjson = json.load(f)
    formatted = flatten_dumps(cjson)
    if file.name == "acetone.cjson":
        print(json.dumps(cjson))
        print(formatted)
        print(json.dumps(cjson, indent=2))
    with open(file, "w") as f:
        f.write(formatted)
    # Test we get the same object back as we originally read
    check = (cjson == json.loads(formatted))
    checks[file] = check

print(checks)
for k, v in checks.items():
    if v is False:
        print(f"{k} was not validated")

@ghutchis ghutchis merged commit 8a37883 into OpenChemistry:master Jan 6, 2025
1 check passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants