View Javadoc
1 /*** 2 Copyright (C) 2002-2003 Together 3 4 This library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU Lesser General Public 6 License as published by the Free Software Foundation; either 7 version 2.1 of the License, or (at your option) any later version. 8 9 This library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Lesser General Public License for more details. 13 14 You should have received a copy of the GNU Lesser General Public 15 License along with this library; if not, write to the Free Software 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 18 */ 19 20 package org.relique.jdbc.csv; 21 22 import java.io.BufferedReader; 23 import java.io.File; 24 import java.io.FileInputStream; 25 import java.io.IOException; 26 import java.io.InputStreamReader; 27 import java.sql.SQLException; 28 import java.util.ArrayList; 29 import java.util.HashMap; 30 import java.util.Map; 31 import java.util.Vector; 32 33 34 /*** 35 * This class is a helper class that handles the reading and parsing of data 36 * from a .csv file. 37 * 38 * @author Zoran Milakovic 39 */ 40 41 public class CsvReader 42 { 43 private BufferedReader bufReader; 44 private CsvRandomAccessFile randomReader; 45 private String[] columnNames; 46 private Vector colTypes=new Vector(); 47 private Map columnTypes1 = new KeyInsensitiveHashMap(); 48 private Map columnTypes = new KeyInsensitiveHashMap(); 49 private String[] columns; 50 private java.lang.String buf = null; 51 private char separator = CsvDriver.DEFAULT_SEPARATOR; 52 private long maxFileSize = CsvDriver.DEFAULT_FILE_MAXSIZE; 53 private String extension = CsvDriver.DEFAULT_EXTENSION; 54 private boolean suppressHeaders = false; 55 private String lineBreakEscape = CsvDriver.DEFAULT_LINE_BREAK_ESCAPE; 56 private String doubleQuoteEscape = CsvDriver.DEFAULT_DOUBLE_QUOTE_ESCAPE; 57 private String carriageReturnEscape = CsvDriver.DEFAULT_CARRIAGE_RETURN_ESCAPE; 58 private String tableName; 59 private String fileName; 60 private String charset = null; 61 private boolean trimString = false; 62 63 64 /*** 65 * 66 * @param fileName 67 * @param separator 68 * @param suppressHeaders 69 * @param charset 70 * @param extension 71 * @throws java.lang.Exception 72 */ 73 public CsvReader( 74 String fileName, 75 char separator, 76 boolean suppressHeaders, 77 String charset, 78 String extension, 79 String lineBreakEscape, 80 String carriageReturnEscape, 81 boolean trimString 82 ) 83 throws java.lang.Exception 84 { 85 this.separator = separator; 86 this.suppressHeaders = suppressHeaders; 87 this.fileName = fileName; 88 this.charset = charset; 89 this.lineBreakEscape = lineBreakEscape; 90 this.carriageReturnEscape = carriageReturnEscape; 91 this.trimString = trimString; 92 if( extension != null ) 93 this.extension = extension; 94 95 if (charset != null) { 96 if(Utils.isUTF16(charset)) 97 randomReader = new CsvRandomAccessFile(fileName,charset); 98 else 99 bufReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName),charset)); 100 } else { 101 bufReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName))); 102 } 103 if (this.suppressHeaders) 104 { 105 // No column names available. 106 // Read first data line and determine number of colums. 107 buf = this.readLine(); 108 String[] data = parseCsvLineAsHeader(buf); 109 columnNames = new String[data.length]; 110 String[] columnType = new String[colTypes.size()]; 111 colTypes.copyInto(columnType); 112 for (int i = 0; i < data.length; i++) 113 { 114 columnNames[i] = "COLUMN" + String.valueOf(i+1); 115 columnTypes.put(columnNames[i],columnType[i]); 116 } 117 data = null; 118 } 119 else 120 { 121 String headerLine = this.readLine(); 122 columnNames = parseCsvLineAsHeader(headerLine); 123 columnTypes = columnTypes1; 124 } 125 } 126 127 128 /*** 129 * Gets the columnNames attribute of the CsvReader object 130 * 131 * @return The columnNames value 132 */ 133 public String[] getColumnNames() 134 { 135 return columnNames; 136 } 137 138 /*** 139 * 140 * @return array with column types 141 */ 142 public Map getColumnTypes() 143 { 144 return columnTypes; 145 } 146 147 148 149 public String getTableName() { 150 if(tableName != null) 151 return tableName; 152 153 int lastSlash = 0; 154 for(int i = fileName.length()-1; i >= 0; i--) 155 if(fileName.charAt(i) == '/' || fileName.charAt(i) == '//') { 156 lastSlash = i; 157 break; 158 } 159 tableName = fileName.substring(lastSlash+1, fileName.length() - 4); 160 return tableName; 161 } 162 163 /*** 164 * Get the value of the column at the specified index. 165 * 166 * @param columnIndex Description of Parameter 167 * @return The column value 168 * @since 169 */ 170 171 public String getColumn(int columnIndex) throws SQLException 172 { 173 if (columnIndex >= columns.length) 174 { 175 return null; 176 } 177 return formatString( columns[columnIndex] ); 178 } 179 180 /*** 181 * Get value from column at specified name. 182 * If the column name is not found, throw an error. 183 * 184 * @param columnName Description of Parameter 185 * @return The column value 186 * @exception SQLException Description of Exception 187 * @since 188 */ 189 190 public String getColumn(String columnName) throws SQLException 191 { 192 for (int loop = 0; loop < columnNames.length; loop++) 193 { 194 if (columnName.equalsIgnoreCase(columnNames[loop]) 195 || columnName.equalsIgnoreCase(getTableName() + "." 196 + columnNames[loop])) 197 { 198 return getColumn(loop); 199 } 200 } 201 throw new SQLException("Column '" + columnName + "' not found."); 202 } 203 204 205 /*** 206 *Description of the Method 207 * 208 * @return Description of the Returned Value 209 * @exception SQLException Description of Exception 210 * @since 211 */ 212 public boolean next() throws SQLException { 213 columns = new String[columnNames.length]; 214 String dataLine = null; 215 try { 216 if (suppressHeaders && (buf != null)) { 217 // The buffer is not empty yet, so use this first. 218 dataLine = buf; 219 buf = null; 220 } else { 221 // read new line of data from input. 222 dataLine = this.readLine(); 223 } 224 if (dataLine == null) { 225 String nextFileName = getNextFileName(); 226 if (new File(nextFileName).exists()) { 227 this.fileName = nextFileName; 228 if (charset != null) { 229 if(Utils.isUTF16(charset)) 230 randomReader = new CsvRandomAccessFile(fileName,charset); 231 else 232 bufReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName),charset)); 233 } else { 234 bufReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName))); 235 } 236 //skip header 237 dataLine = this.readLine(); 238 dataLine = this.readLine(); 239 } 240 else { 241 this.closeInputs(); 242 return false; 243 } 244 } 245 246 } catch (IOException e) { 247 throw new SQLException(e.toString()); 248 } 249 columns = parseCsvLine(dataLine); 250 return true; 251 } 252 253 254 private String getNextFileName() { 255 String currentFileName = this.fileName; 256 String newName = ""; 257 String number = ""; 258 //name without extension 259 String currentFileExtension = currentFileName.substring(currentFileName.lastIndexOf("."), currentFileName.length()); 260 currentFileName = currentFileName.substring(0, currentFileName.lastIndexOf(".")); 261 if( currentFileExtension.endsWith(CsvDriver.FILE_NAME_EXT) ) { 262 number += currentFileName.substring(currentFileName.length()-3, currentFileName.length()); 263 long num = Long.valueOf(number).longValue()+1; 264 if( num >= 100 && num < 1000 ) 265 number = String.valueOf( num ); 266 else if ( num >= 10 && num < 100 ) 267 number = "0"+String.valueOf( num ); 268 else if ( num > 1 && num < 10 ) 269 number = "00"+String.valueOf( num ); 270 currentFileName = currentFileName.substring(0, currentFileName.length()-3); 271 newName = currentFileName + number + currentFileExtension; 272 } else { 273 newName = currentFileName.toUpperCase() + "001" + this.extension + CsvDriver.FILE_NAME_EXT; 274 } 275 return newName; 276 } 277 278 279 /*** 280 *Description of the Method 281 * 282 * @since 283 */ 284 public void close() 285 { 286 try 287 { 288 this.closeInputs(); 289 buf = null; 290 } 291 catch (Exception e) 292 { 293 } 294 } 295 296 297 /*** 298 * 299 * Parse csv line with columnTypes. 300 * 301 * @param line 302 * @return array with values or column names. 303 * @throws SQLException 304 */ 305 protected String[] parseCsvLine(String line) throws SQLException 306 { 307 ArrayList values = new ArrayList(); 308 boolean inQuotedString = false; 309 String value = ""; 310 String orgLine = line; 311 int currentPos = 0; 312 int fullLine = 0; 313 int currentColumn = 0; 314 int indexOfBinaryObject = 0; 315 char currentChar; 316 line += separator; 317 long lineLength = line.length(); 318 while (fullLine == 0) { 319 currentPos = 0; 320 while (currentPos < lineLength) { 321 322 //handle BINARY columns 323 if( !(this.columnTypes.size() <= currentColumn ) ) { 324 if (this.columnTypes.get(columnNames[currentColumn]).equals(CsvDriver.BINARY_TYPE)) { 325 String binaryValue = ""; 326 currentChar = line.charAt(currentPos); 327 if (currentChar == ',') { 328 values.add(binaryValue); //binary value is null; 329 currentPos ++; 330 } 331 else if (currentChar == '"') { 332 if (line.charAt(currentPos + 1) == '"') { 333 values.add(binaryValue); //binary value is null 334 currentPos = currentPos + 3; 335 } 336 else { 337 // take all until next separator, and that is value 338 // do not insert BinaryObject+index into line, just set right currentPos 339 // and insert value into vector 340 // binary value is always beteween quotes (") 341 binaryValue = line.substring(currentPos); 342 binaryValue = binaryValue.substring(1, 343 binaryValue.indexOf(separator) - 344 1); 345 values.add(binaryValue); 346 currentPos += binaryValue.length() + 3; 347 } 348 } 349 //set currentColumn++ 350 currentColumn++; 351 continue; 352 } 353 } else { 354 throw new SQLException("Invalid csv format : file = "+new File(fileName).getAbsolutePath()+", line = "+line); 355 } 356 357 358 //parse one by one character 359 currentChar = line.charAt(currentPos); 360 if (value.length() == 0 && currentChar == '"' && !inQuotedString) { 361 //enter here if we are at start of column value 362 currentPos++; 363 inQuotedString = true; 364 continue; 365 } 366 367 if (currentChar == '"') { 368 //get next character 369 char nextChar = line.charAt(currentPos + 1); 370 //if we have "", consider it as ", and add it to value 371 if (nextChar == '"') { 372 value += currentChar; 373 currentPos++; 374 } 375 else { 376 //enter here if we are at end of column value 377 // if (!inQuotedString) { 378 // throw new SQLException("Unexpected '\"' in position " + 379 // currentPos + ". Line=" + orgLine); 380 // } 381 if (inQuotedString && nextChar == separator) { 382 //throw new SQLException("Expecting " + separator + 383 // " in position " + (currentPos + 1) + 384 // ". Line=" + orgLine); 385 386 //set currentPos to comma after value 387 currentPos++; 388 //if value is empty string between double quotes consider it as empty string 389 //else if value is empty string between commas consider it as null value 390 if (this.trimString) 391 value=value.trim(); 392 values.add(value); 393 currentColumn++; 394 value = ""; 395 inQuotedString = false; 396 } 397 else { 398 if (nextChar != separator){ 399 value += currentChar; 400 } 401 } 402 } 403 } 404 405 else { 406 //when we are at end of column value, and value is not inside of double quotes 407 if (currentChar == separator) { 408 //when have separator in data 409 if (inQuotedString) { 410 value += currentChar; 411 } 412 else { 413 //if value is empty string between double quotes consider it as empty string 414 //else if value is empty string between commas consider it as null value 415 if (this.trimString) 416 value=value.trim(); 417 if( value.equals("") ) 418 value = null; 419 420 values.add(value); 421 currentColumn++; 422 value = ""; 423 } 424 } 425 else { 426 value += currentChar; 427 } 428 } 429 430 currentPos++; 431 } //end while 432 433 if (inQuotedString) { 434 // Remove extra , added at start 435 value = value.substring(0, value.length() - 1); 436 try { 437 line = this.readLine(); 438 } 439 catch (IOException e) { 440 throw new SQLException(e.toString()); 441 } 442 } 443 else { 444 fullLine = 1; 445 } 446 447 }// end while( fullLine == 0 ) 448 String[] retVal = new String[values.size()]; 449 values.toArray(retVal); 450 451 return retVal; 452 } 453 454 455 /*** 456 * 457 * Parse csv line, whithout columnTypes. 458 * 459 * @param line 460 * @return array with values or column names. 461 * @throws SQLException 462 */ 463 protected String[] parseCsvLineAsHeader(String line) throws SQLException 464 { 465 Vector values = new Vector(); 466 // ArrayList columnTypesList = new ArrayList(); 467 boolean inQuotedString = false; 468 String value = ""; 469 String orgLine = line; 470 int currentPos = 0; 471 int fullLine = 0; 472 473 while (fullLine == 0) { 474 currentPos = 0; 475 line += separator; 476 while (currentPos < line.length()) { 477 char currentChar = line.charAt(currentPos); 478 if (value.length() == 0 && currentChar == '"' && !inQuotedString) { 479 currentPos++; 480 inQuotedString = true; 481 continue; 482 } 483 if (currentChar == '"') { 484 char nextChar = line.charAt(currentPos + 1); 485 if (nextChar == '"') { 486 value += currentChar; 487 currentPos++; 488 } 489 else { 490 // if (!inQuotedString) { 491 // throw new SQLException("Unexpected '\"' in position " + 492 // currentPos + ". Line=" + orgLine); 493 // } 494 if (inQuotedString && nextChar == separator) { 495 // throw new SQLException("Expecting " + separator + " in position " + 496 // (currentPos + 1) + ". Line=" + orgLine); 497 // } 498 if (this.trimString) 499 value=value.trim(); 500 if (value.endsWith("-"+CsvDriver.BINARY_TYPE)) { 501 value = value.substring(0,value.indexOf("-"+CsvDriver.BINARY_TYPE)); 502 columnTypes1.put(value, CsvDriver.BINARY_TYPE); 503 colTypes.add(CsvDriver.BINARY_TYPE); 504 } 505 else{ 506 columnTypes1.put(value, CsvDriver.VARCHAR_TYPE); 507 colTypes.add(CsvDriver.VARCHAR_TYPE); 508 } 509 values.add(value); 510 value = ""; 511 inQuotedString = false; 512 currentPos++; 513 } 514 else{ 515 if (nextChar != separator){ 516 value += currentChar; 517 } 518 } 519 } 520 } 521 else { 522 if (currentChar == separator) { 523 if (inQuotedString) { 524 value += currentChar; 525 } 526 else { 527 if (this.trimString) 528 value=value.trim(); 529 if (value.endsWith("-"+CsvDriver.BINARY_TYPE)) { 530 value = value.substring(0,value.indexOf("-"+CsvDriver.BINARY_TYPE)); 531 columnTypes1.put(value, CsvDriver.BINARY_TYPE); 532 colTypes.add(CsvDriver.BINARY_TYPE); 533 } 534 else{ 535 columnTypes1.put(value, CsvDriver.VARCHAR_TYPE); 536 colTypes.add(CsvDriver.VARCHAR_TYPE); 537 } 538 values.add(value); 539 value = ""; 540 } 541 } 542 else { 543 value += currentChar; 544 } 545 } 546 currentPos++; 547 } 548 if (inQuotedString) { 549 value = value.substring(0, value.length() - 1); 550 try { 551 line = this.readLine(); 552 } 553 catch (IOException e) { 554 throw new SQLException(e.toString()); 555 } 556 } 557 else { 558 fullLine = 1; 559 } 560 } 561 String[] retVal = new String[values.size()]; 562 values.copyInto(retVal); 563 564 565 return retVal; 566 567 } 568 569 private String formatString(String str) throws SQLException { 570 String retValue = str; 571 try { 572 //replace spec. characters 573 retValue = Utils.replaceAll(retValue,this.lineBreakEscape, "\n"); 574 retValue = Utils.replaceAll(retValue,this.carriageReturnEscape, "\r"); 575 }catch(Exception e) { 576 throw new SQLException("Error while reformat string ! : "+str); 577 } 578 return retValue; 579 } 580 581 private String readLine() throws IOException { 582 String retVal = ""; 583 if(Utils.isUTF16(this.charset)) { 584 retVal = this.randomReader.readCsvLine(); 585 } else { 586 retVal = bufReader.readLine(); 587 } 588 return retVal; 589 } 590 591 private void closeInputs() throws IOException { 592 if(!Utils.isUTF16(this.charset)) { 593 if(this.bufReader != null) 594 bufReader.close(); 595 } else { 596 if(this.randomReader != null) 597 randomReader.close(); 598 } 599 } 600 601 602 } 603

This page was automatically generated by Maven