001/* 002 * Unit-API - Units of Measurement API for Java 003 * Copyright (c) 2005-2016, Jean-Marie Dautelle, Werner Keil, V2COM. 004 * 005 * All rights reserved. 006 * 007 * Redistribution and use in source and binary forms, with or without modification, 008 * are permitted provided that the following conditions are met: 009 * 010 * 1. Redistributions of source code must retain the above copyright notice, 011 * this list of conditions and the following disclaimer. 012 * 013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions 014 * and the following disclaimer in the documentation and/or other materials provided with the distribution. 015 * 016 * 3. Neither the name of JSR-363 nor the names of its contributors may be used to endorse or promote products 017 * derived from this software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 026 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 028 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package systems.uom.ucum.format; 031 032import static tec.uom.se.AbstractUnit.ONE; 033import si.uom.SI; 034import systems.uom.ucum.internal.format.UCUMFormatParser; 035import tec.uom.se.AbstractConverter; 036import tec.uom.se.AbstractUnit; 037import tec.uom.se.format.AbstractUnitFormat; 038import tec.uom.se.format.SymbolMap; 039import tec.uom.se.function.MultiplyConverter; 040import tec.uom.se.function.RationalConverter; 041import tec.uom.se.internal.format.TokenException; 042import tec.uom.se.internal.format.TokenMgrError; 043import tec.uom.se.unit.AnnotatedUnit; 044import tec.uom.se.unit.MetricPrefix; 045import tec.uom.se.unit.TransformedUnit; 046 047import javax.measure.Quantity; 048import javax.measure.Unit; 049import javax.measure.UnitConverter; 050import javax.measure.format.ParserException; 051 052import java.io.ByteArrayInputStream; 053import java.io.IOException; 054import java.math.BigInteger; 055import java.text.ParsePosition; 056import java.util.*; 057 058/** 059 * <p> 060 * This class provides the interface for formatting and parsing 061 * {@link AbstractUnit units} according to the 062 * <a href="http://unitsofmeasure.org/">Uniform Code for CommonUnits of 063 * Measure</a> (UCUM). 064 * </p> 065 * 066 * <p> 067 * For a technical/historical overview of this format please read 068 * <a href="http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=61354"> 069 * CommonUnits of Measure in Clinical Information Systems</a>. 070 * </p> 071 * 072 * <p> 073 * As of revision 1.16, the BNF in the UCUM standard contains an 074 * <a href="http://unitsofmeasure.org/ticket/4">error</a>. I've attempted to 075 * work around the problem by modifying the BNF productions for <Term>. 076 * Once the error in the standard is corrected, it may be necessary to modify 077 * the productions in the UCUMFormatParser.jj file to conform to the standard. 078 * </p> 079 * 080 * @author <a href="mailto:eric-r@northwestern.edu">Eric Russell</a> 081 * @author <a href="mailto:units@catmedia.us">Werner Keil</a> 082 * @version 0.7.2, 24 March 2017 083 */ 084public abstract class UCUMFormat extends AbstractUnitFormat { 085 /** 086 * 087 */ 088 // private static final long serialVersionUID = 8586656823290135155L; 089 090 // A helper to declare bundle names for all instances 091 private static final String BUNDLE_BASE = UCUMFormat.class.getName(); 092 093 // ///////////////// 094 // Class methods // 095 // ///////////////// 096 097 /** 098 * Returns the instance for formatting/parsing using the given variant 099 * 100 * @param variant 101 * the <strong>UCUM</strong> variant to use 102 */ 103 public static UCUMFormat getInstance(Variant variant) { 104 switch (variant) { 105 case CASE_INSENSITIVE: 106 return Parsing.DEFAULT_CI; 107 case CASE_SENSITIVE: 108 return Parsing.DEFAULT_CS; 109 case PRINT: 110 return Print.DEFAULT; 111 default: 112 throw new IllegalArgumentException("Unknown variant: " + variant); 113 } 114 } 115 116 /** 117 * Returns an instance for formatting and parsing using user defined symbols 118 * 119 * @param variant 120 * the <strong>UCUM</strong> variant to use 121 * @param symbolMap 122 * the map of user defined symbols to use 123 */ 124 public static UCUMFormat getInstance(Variant variant, SymbolMap symbolMap) { 125 switch (variant) { 126 case CASE_INSENSITIVE: 127 return new Parsing(symbolMap, false); 128 case CASE_SENSITIVE: 129 return new Parsing(symbolMap, true); 130 case PRINT: 131 return new Print(symbolMap); 132 default: 133 throw new IllegalArgumentException("Unknown variant: " + variant); 134 } 135 } 136 137 /** 138 * The symbol map used by this instance to map between {@link AbstractUnit 139 * Unit}s and <code>String</code>s. 140 */ 141 final SymbolMap symbolMap; 142 143 /** 144 * Get the symbol map used by this instance to map between 145 * {@link AbstractUnit Unit}s and <code>String</code>s, etc... 146 * 147 * @return SymbolMap the current symbol map 148 */ 149 @Override 150 protected SymbolMap getSymbols() { 151 return symbolMap; 152 } 153 154 // //////////////// 155 // Constructors // 156 // //////////////// 157 /** 158 * Base constructor. 159 */ 160 UCUMFormat(SymbolMap symbolMap) { 161 this.symbolMap = symbolMap; 162 } 163 164 // /////////// 165 // Parsing // 166 // /////////// 167 public abstract Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException; 168 169 protected Unit<?> parse(CharSequence csq, int index) throws ParserException { 170 return parse(csq, new ParsePosition(index)); 171 } 172 173 @Override 174 public abstract Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException; 175 176 // ////////////// 177 // Formatting // 178 // ////////////// 179 @SuppressWarnings({ "rawtypes", "unchecked" }) 180 public Appendable format(Unit<?> unknownUnit, Appendable appendable) throws IOException { 181 if (!(unknownUnit instanceof AbstractUnit)) { 182 throw new UnsupportedOperationException( 183 "The UCUM format supports only known units (AbstractUnit instances)"); 184 } 185 AbstractUnit unit = (AbstractUnit) unknownUnit; 186 CharSequence symbol; 187 CharSequence annotation = null; 188 if (unit instanceof AnnotatedUnit) { 189 AnnotatedUnit annotatedUnit = (AnnotatedUnit) unit; 190 unit = annotatedUnit.getActualUnit(); 191 annotation = annotatedUnit.getAnnotation(); 192 } 193 String mapSymbol = symbolMap.getSymbol(unit); 194 if (mapSymbol != null) { 195 symbol = mapSymbol; 196 } else if (unknownUnit instanceof TransformedUnit) { 197 final StringBuilder temp = new StringBuilder(); 198 final Unit<?> parentUnit = ((TransformedUnit) unit).getParentUnit(); 199 final UnitConverter converter = unit.getConverterTo(parentUnit); 200 final boolean printSeparator = !parentUnit.equals(ONE); 201 202 format(parentUnit, temp); 203 formatConverter(converter, printSeparator, temp); 204 205 symbol = temp; 206 } else if (unit.getBaseUnits() != null) { 207 Map<? extends AbstractUnit<?>, Integer> productUnits = unit.getBaseUnits(); 208 StringBuffer app = new StringBuffer(); 209 for (AbstractUnit<?> u : productUnits.keySet()) { 210 StringBuffer temp = new StringBuffer(); 211 temp = (StringBuffer) format(u, temp); 212 if ((temp.indexOf(".") >= 0) || (temp.indexOf("/") >= 0)) { 213 temp.insert(0, '('); 214 temp.append(')'); 215 } 216 int pow = productUnits.get(u); 217 int indexToAppend; 218 if (app.length() > 0) { // Not the first unit. 219 220 if (pow >= 0) { 221 222 if (app.indexOf("1/") >= 0) { 223 indexToAppend = app.indexOf("1/"); 224 app.replace(indexToAppend, indexToAppend + 2, "/"); 225 // this statement make sure that (1/y).x will be 226 // (x/y) 227 228 } else if (app.indexOf("/") >= 0) { 229 indexToAppend = app.indexOf("/"); 230 app.insert(indexToAppend, "."); 231 indexToAppend++; 232 // this statement make sure that (x/z).y will be 233 // (x.y/z) 234 235 } else { 236 app.append('.'); 237 indexToAppend = app.length(); 238 // this statement make sure that (x).y will be (x.y) 239 } 240 241 } else { 242 app.append('/'); 243 pow = -pow; 244 245 indexToAppend = app.length(); 246 // this statement make sure that (x).y^-z will be 247 // (x/y^z), where z would be added if it has a value 248 // different than 1. 249 } 250 251 } else { // First unit. 252 253 if (pow < 0) { 254 app.append("1/"); 255 pow = -pow; 256 // this statement make sure that x^-y will be (1/x^y), 257 // where z would be added if it has a value different 258 // than 1. 259 } 260 261 indexToAppend = app.length(); 262 } 263 264 app.insert(indexToAppend, temp); 265 266 if (pow != 1) { 267 app.append(Integer.toString(pow)); 268 // this statement make sure that the power will be added if 269 // it's different than 1. 270 } 271 } 272 symbol = app; 273 } else if (!unit.isSystemUnit() || unit.equals(SI.KILOGRAM)) { 274 final StringBuilder temp = new StringBuilder(); 275 UnitConverter converter; 276 boolean printSeparator; 277 if (unit.equals(SI.KILOGRAM)) { 278 // A special case because KILOGRAM is a BaseUnit instead of 279 // a transformed unit, for compatibility with existing SI 280 // unit system. 281 format(SI.GRAM, temp); 282 converter = MetricPrefix.KILO.getConverter(); 283 printSeparator = true; 284 } else { 285 Unit<?> parentUnit = unit.getSystemUnit(); 286 converter = unit.getConverterTo(parentUnit); 287 if (parentUnit.equals(SI.KILOGRAM)) { 288 // More special-case hackery to work around gram/kilogram 289 // inconsistency 290 parentUnit = SI.GRAM; 291 converter = converter.concatenate(MetricPrefix.KILO.getConverter()); 292 } 293 format(parentUnit, temp); 294 printSeparator = !parentUnit.equals(ONE); 295 } 296 formatConverter(converter, printSeparator, temp); 297 symbol = temp; 298 } else if (unit.getSymbol() != null) { 299 symbol = unit.getSymbol(); 300 } else { 301 throw new IllegalArgumentException("Cannot format the given Object as UCUM units (unsupported unit " 302 + unit.getClass().getName() + "). " 303 + "Custom units types should override the toString() method as the default implementation uses the UCUM format."); 304 } 305 306 appendable.append(symbol); 307 if (annotation != null && annotation.length() > 0) { 308 appendAnnotation(symbol, annotation, appendable); 309 } 310 311 return appendable; 312 } 313 314 public void label(Unit<?> unit, String label) { 315 } 316 317 public boolean isLocaleSensitive() { 318 return false; 319 } 320 321 void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException { 322 appendable.append('{'); 323 appendable.append(annotation); 324 appendable.append('}'); 325 } 326 327 /** 328 * Formats the given converter to the given StringBuffer. This is similar to 329 * what {@link ConverterFormat} does, but there's no need to worry about 330 * operator precedence here, since UCUM only supports multiplication, 331 * division, and exponentiation and expressions are always evaluated left- 332 * to-right. 333 * 334 * @param converter 335 * the converter to be formatted 336 * @param continued 337 * <code>true</code> if the converter expression should begin 338 * with an operator, otherwise <code>false</code>. This will 339 * always be true unless the unit being modified is equal to 340 * Unit.ONE. 341 * @param buffer 342 * the <code>StringBuffer</code> to append to. Contains the 343 * already-formatted unit being modified by the given converter. 344 */ 345 void formatConverter(UnitConverter converter, boolean continued, StringBuilder buffer) { 346 boolean unitIsExpression = ((buffer.indexOf(".") >= 0) || (buffer.indexOf("/") >= 0)); 347 MetricPrefix prefix = symbolMap.getPrefix(converter); 348 if ((prefix != null) && (!unitIsExpression)) { 349 buffer.insert(0, symbolMap.getSymbol(prefix)); 350 } else if (converter == AbstractConverter.IDENTITY) { 351 // do nothing 352 } else if (converter instanceof MultiplyConverter) { 353 if (unitIsExpression) { 354 buffer.insert(0, '('); 355 buffer.append(')'); 356 } 357 MultiplyConverter multiplyConverter = (MultiplyConverter) converter; 358 double factor = multiplyConverter.getFactor(); 359 long lFactor = (long) factor; 360 if ((lFactor != factor) || (lFactor < -9007199254740992L) || (lFactor > 9007199254740992L)) { 361 throw new IllegalArgumentException("Only integer factors are supported in UCUM"); 362 } 363 if (continued) { 364 buffer.append('.'); 365 } 366 buffer.append(lFactor); 367 } else if (converter instanceof RationalConverter) { 368 if (unitIsExpression) { 369 buffer.insert(0, '('); 370 buffer.append(')'); 371 } 372 RationalConverter rationalConverter = (RationalConverter) converter; 373 if (!rationalConverter.getDividend().equals(BigInteger.ONE)) { 374 if (continued) { 375 buffer.append('.'); 376 } 377 buffer.append(rationalConverter.getDividend()); 378 } 379 if (!rationalConverter.getDivisor().equals(BigInteger.ONE)) { 380 buffer.append('/'); 381 buffer.append(rationalConverter.getDivisor()); 382 } 383 } else { // All other converter type (e.g. exponential) we use the 384 // string representation. 385 buffer.insert(0, converter.toString() + "("); 386 buffer.append(")"); 387 } 388 } 389 390 // static final ResourceBundle.Control getControl(final String key) { 391 // return new ResourceBundle.Control() { 392 // @Override 393 // public List<Locale> getCandidateLocales(String baseName, Locale locale) { 394 // if (baseName == null) 395 // throw new NullPointerException(); 396 // if (locale.equals(new Locale(key))) { 397 // return Arrays.asList( 398 // locale, 399 // Locale.GERMANY, 400 // // no Locale.GERMAN here 401 // Locale.ROOT); 402 // } else if (locale.equals(Locale.GERMANY)) { 403 // return Arrays.asList( 404 // locale, 405 // // no Locale.GERMAN here 406 // Locale.ROOT); 407 // } 408 // return super.getCandidateLocales(baseName, locale); 409 // } 410 // }; 411 // } 412 413 // ///////////////// 414 // Inner classes // 415 // ///////////////// 416 417 /** 418 * Variant of unit representation in the UCUM standard 419 * 420 * @see <a href= 421 * "http://unitsofmeasure.org/ucum.html#section-Character-Set-and-Lexical-Rules"> 422 * UCUM - Character Set and Lexical Rules</a> 423 */ 424 public static enum Variant { 425 CASE_SENSITIVE, CASE_INSENSITIVE, PRINT 426 } 427 428 /** 429 * The Print format is used to output units according to the "print" column 430 * in the UCUM standard. Because "print" symbols in UCUM are not unique, 431 * this class of UCUMFormat may not be used for parsing, only for 432 * formatting. 433 */ 434 private static final class Print extends UCUMFormat { 435 436 /** 437 * 438 */ 439 // private static final long serialVersionUID = 2990875526976721414L; 440 private static final SymbolMap PRINT_SYMBOLS = SymbolMap.of(ResourceBundle.getBundle(BUNDLE_BASE + "_Print")); 441 private static final Print DEFAULT = new Print(PRINT_SYMBOLS); 442 443 public Print(SymbolMap symbols) { 444 super(symbols); 445 } 446 447 @Override 448 public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition pos) throws IllegalArgumentException { 449 throw new UnsupportedOperationException( 450 "The print format is for pretty-printing of units only. Parsing is not supported."); 451 } 452 453 @Override 454 void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException { 455 if (symbol != null && symbol.length() > 0) { 456 appendable.append('('); 457 appendable.append(annotation); 458 appendable.append(')'); 459 } else { 460 appendable.append(annotation); 461 } 462 } 463 464 @Override 465 public Unit<? extends Quantity<?>> parse(CharSequence csq) throws IllegalArgumentException { 466 return parse(csq, new ParsePosition(0)); 467 468 } 469 } 470 471 /** 472 * The Parsing format outputs formats and parses units according to the 473 * "c/s" or "c/i" column in the UCUM standard, depending on which SymbolMap 474 * is passed to its constructor. 475 */ 476 private static final class Parsing extends UCUMFormat { 477 // private static final long serialVersionUID = -922531801940132715L; 478 private static final SymbolMap CASE_SENSITIVE_SYMBOLS = SymbolMap 479 .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CS", new ResourceBundle.Control() { 480 @Override 481 public List<Locale> getCandidateLocales(String baseName, Locale locale) { 482 if (baseName == null) 483 throw new NullPointerException(); 484 if (locale.equals(new Locale("", "CS"))) { 485 return Arrays.asList(locale, Locale.ROOT); 486 } 487 return super.getCandidateLocales(baseName, locale); 488 } 489 })); 490 private static final SymbolMap CASE_INSENSITIVE_SYMBOLS = SymbolMap 491 .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CI", new ResourceBundle.Control() { 492 @Override 493 public List<Locale> getCandidateLocales(String baseName, Locale locale) { 494 if (baseName == null) 495 throw new NullPointerException(); 496 if (locale.equals(new Locale("", "CI"))) { 497 return Arrays.asList(locale, Locale.ROOT); 498 } else if (locale.equals(Locale.GERMANY)) { // TODO 499 // why 500 // GERMANY? 501 return Arrays.asList(locale, 502 // no Locale.GERMAN here 503 Locale.ROOT); 504 } 505 return super.getCandidateLocales(baseName, locale); 506 } 507 })); 508 private static final Parsing DEFAULT_CS = new Parsing(CASE_SENSITIVE_SYMBOLS, true); 509 private static final Parsing DEFAULT_CI = new Parsing(CASE_INSENSITIVE_SYMBOLS, false); 510 private final boolean caseSensitive; 511 512 public Parsing(SymbolMap symbols, boolean caseSensitive) { 513 super(symbols); 514 this.caseSensitive = caseSensitive; 515 } 516 517 @Override 518 public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException { 519 // Parsing reads the whole character sequence from the parse 520 // position. 521 int start = cursor.getIndex(); 522 int end = csq.length(); 523 if (end <= start) { 524 return ONE; 525 } 526 String source = csq.subSequence(start, end).toString().trim(); 527 if (source.length() == 0) { 528 return ONE; 529 } 530 if (!caseSensitive) { 531 source = source.toUpperCase(); 532 } 533 UCUMFormatParser parser = new UCUMFormatParser(symbolMap, new ByteArrayInputStream(source.getBytes())); 534 try { 535 Unit<?> result = parser.parseUnit(); 536 cursor.setIndex(end); 537 return result; 538 } catch (TokenException e) { 539 if (e.currentToken != null) { 540 cursor.setErrorIndex(start + e.currentToken.endColumn); 541 } else { 542 cursor.setErrorIndex(start); 543 } 544 throw new ParserException(e); 545 } catch (TokenMgrError e) { 546 cursor.setErrorIndex(start); 547 throw new IllegalArgumentException(e.getMessage()); 548 } 549 } 550 551 @Override 552 public Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException { 553 return parse(csq, new ParsePosition(0)); 554 } 555 } 556}