1 /***
2 Copyright (C) 2002-2003 Together
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
18 */
19
20 package org.relique.jdbc.csv;
21
22 import java.io.BufferedReader;
23 import java.io.File;
24 import java.io.FileInputStream;
25 import java.io.IOException;
26 import java.io.InputStreamReader;
27 import java.sql.SQLException;
28 import java.util.ArrayList;
29 import java.util.HashMap;
30 import java.util.Map;
31 import java.util.Vector;
32
33
34 /***
35 * This class is a helper class that handles the reading and parsing of data
36 * from a .csv file.
37 *
38 * @author Zoran Milakovic
39 */
40
41 public class CsvReader
42 {
43 private BufferedReader bufReader;
44 private CsvRandomAccessFile randomReader;
45 private String[] columnNames;
46 private Vector colTypes=new Vector();
47 private Map columnTypes1 = new KeyInsensitiveHashMap();
48 private Map columnTypes = new KeyInsensitiveHashMap();
49 private String[] columns;
50 private java.lang.String buf = null;
51 private char separator = CsvDriver.DEFAULT_SEPARATOR;
52 private long maxFileSize = CsvDriver.DEFAULT_FILE_MAXSIZE;
53 private String extension = CsvDriver.DEFAULT_EXTENSION;
54 private boolean suppressHeaders = false;
55 private String lineBreakEscape = CsvDriver.DEFAULT_LINE_BREAK_ESCAPE;
56 private String doubleQuoteEscape = CsvDriver.DEFAULT_DOUBLE_QUOTE_ESCAPE;
57 private String carriageReturnEscape = CsvDriver.DEFAULT_CARRIAGE_RETURN_ESCAPE;
58 private String tableName;
59 private String fileName;
60 private String charset = null;
61 private boolean trimString = false;
62
63
64 /***
65 *
66 * @param fileName
67 * @param separator
68 * @param suppressHeaders
69 * @param charset
70 * @param extension
71 * @throws java.lang.Exception
72 */
73 public CsvReader(
74 String fileName,
75 char separator,
76 boolean suppressHeaders,
77 String charset,
78 String extension,
79 String lineBreakEscape,
80 String carriageReturnEscape,
81 boolean trimString
82 )
83 throws java.lang.Exception
84 {
85 this.separator = separator;
86 this.suppressHeaders = suppressHeaders;
87 this.fileName = fileName;
88 this.charset = charset;
89 this.lineBreakEscape = lineBreakEscape;
90 this.carriageReturnEscape = carriageReturnEscape;
91 this.trimString = trimString;
92 if( extension != null )
93 this.extension = extension;
94
95 if (charset != null) {
96 if(Utils.isUTF16(charset))
97 randomReader = new CsvRandomAccessFile(fileName,charset);
98 else
99 bufReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName),charset));
100 } else {
101 bufReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName)));
102 }
103 if (this.suppressHeaders)
104 {
105 // No column names available.
106 // Read first data line and determine number of colums.
107 buf = this.readLine();
108 String[] data = parseCsvLineAsHeader(buf);
109 columnNames = new String[data.length];
110 String[] columnType = new String[colTypes.size()];
111 colTypes.copyInto(columnType);
112 for (int i = 0; i < data.length; i++)
113 {
114 columnNames[i] = "COLUMN" + String.valueOf(i+1);
115 columnTypes.put(columnNames[i],columnType[i]);
116 }
117 data = null;
118 }
119 else
120 {
121 String headerLine = this.readLine();
122 columnNames = parseCsvLineAsHeader(headerLine);
123 columnTypes = columnTypes1;
124 }
125 }
126
127
128 /***
129 * Gets the columnNames attribute of the CsvReader object
130 *
131 * @return The columnNames value
132 */
133 public String[] getColumnNames()
134 {
135 return columnNames;
136 }
137
138 /***
139 *
140 * @return array with column types
141 */
142 public Map getColumnTypes()
143 {
144 return columnTypes;
145 }
146
147
148
149 public String getTableName() {
150 if(tableName != null)
151 return tableName;
152
153 int lastSlash = 0;
154 for(int i = fileName.length()-1; i >= 0; i--)
155 if(fileName.charAt(i) == '/' || fileName.charAt(i) == '//') {
156 lastSlash = i;
157 break;
158 }
159 tableName = fileName.substring(lastSlash+1, fileName.length() - 4);
160 return tableName;
161 }
162
163 /***
164 * Get the value of the column at the specified index.
165 *
166 * @param columnIndex Description of Parameter
167 * @return The column value
168 * @since
169 */
170
171 public String getColumn(int columnIndex) throws SQLException
172 {
173 if (columnIndex >= columns.length)
174 {
175 return null;
176 }
177 return formatString( columns[columnIndex] );
178 }
179
180 /***
181 * Get value from column at specified name.
182 * If the column name is not found, throw an error.
183 *
184 * @param columnName Description of Parameter
185 * @return The column value
186 * @exception SQLException Description of Exception
187 * @since
188 */
189
190 public String getColumn(String columnName) throws SQLException
191 {
192 for (int loop = 0; loop < columnNames.length; loop++)
193 {
194 if (columnName.equalsIgnoreCase(columnNames[loop])
195 || columnName.equalsIgnoreCase(getTableName() + "."
196 + columnNames[loop]))
197 {
198 return getColumn(loop);
199 }
200 }
201 throw new SQLException("Column '" + columnName + "' not found.");
202 }
203
204
205 /***
206 *Description of the Method
207 *
208 * @return Description of the Returned Value
209 * @exception SQLException Description of Exception
210 * @since
211 */
212 public boolean next() throws SQLException {
213 columns = new String[columnNames.length];
214 String dataLine = null;
215 try {
216 if (suppressHeaders && (buf != null)) {
217 // The buffer is not empty yet, so use this first.
218 dataLine = buf;
219 buf = null;
220 } else {
221 // read new line of data from input.
222 dataLine = this.readLine();
223 }
224 if (dataLine == null) {
225 String nextFileName = getNextFileName();
226 if (new File(nextFileName).exists()) {
227 this.fileName = nextFileName;
228 if (charset != null) {
229 if(Utils.isUTF16(charset))
230 randomReader = new CsvRandomAccessFile(fileName,charset);
231 else
232 bufReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName),charset));
233 } else {
234 bufReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName)));
235 }
236 //skip header
237 dataLine = this.readLine();
238 dataLine = this.readLine();
239 }
240 else {
241 this.closeInputs();
242 return false;
243 }
244 }
245
246 } catch (IOException e) {
247 throw new SQLException(e.toString());
248 }
249 columns = parseCsvLine(dataLine);
250 return true;
251 }
252
253
254 private String getNextFileName() {
255 String currentFileName = this.fileName;
256 String newName = "";
257 String number = "";
258 //name without extension
259 String currentFileExtension = currentFileName.substring(currentFileName.lastIndexOf("."), currentFileName.length());
260 currentFileName = currentFileName.substring(0, currentFileName.lastIndexOf("."));
261 if( currentFileExtension.endsWith(CsvDriver.FILE_NAME_EXT) ) {
262 number += currentFileName.substring(currentFileName.length()-3, currentFileName.length());
263 long num = Long.valueOf(number).longValue()+1;
264 if( num >= 100 && num < 1000 )
265 number = String.valueOf( num );
266 else if ( num >= 10 && num < 100 )
267 number = "0"+String.valueOf( num );
268 else if ( num > 1 && num < 10 )
269 number = "00"+String.valueOf( num );
270 currentFileName = currentFileName.substring(0, currentFileName.length()-3);
271 newName = currentFileName + number + currentFileExtension;
272 } else {
273 newName = currentFileName.toUpperCase() + "001" + this.extension + CsvDriver.FILE_NAME_EXT;
274 }
275 return newName;
276 }
277
278
279 /***
280 *Description of the Method
281 *
282 * @since
283 */
284 public void close()
285 {
286 try
287 {
288 this.closeInputs();
289 buf = null;
290 }
291 catch (Exception e)
292 {
293 }
294 }
295
296
297 /***
298 *
299 * Parse csv line with columnTypes.
300 *
301 * @param line
302 * @return array with values or column names.
303 * @throws SQLException
304 */
305 protected String[] parseCsvLine(String line) throws SQLException
306 {
307 ArrayList values = new ArrayList();
308 boolean inQuotedString = false;
309 String value = "";
310 String orgLine = line;
311 int currentPos = 0;
312 int fullLine = 0;
313 int currentColumn = 0;
314 int indexOfBinaryObject = 0;
315 char currentChar;
316 line += separator;
317 long lineLength = line.length();
318 while (fullLine == 0) {
319 currentPos = 0;
320 while (currentPos < lineLength) {
321
322 //handle BINARY columns
323 if( !(this.columnTypes.size() <= currentColumn ) ) {
324 if (this.columnTypes.get(columnNames[currentColumn]).equals(CsvDriver.BINARY_TYPE)) {
325 String binaryValue = "";
326 currentChar = line.charAt(currentPos);
327 if (currentChar == ',') {
328 values.add(binaryValue); //binary value is null;
329 currentPos ++;
330 }
331 else if (currentChar == '"') {
332 if (line.charAt(currentPos + 1) == '"') {
333 values.add(binaryValue); //binary value is null
334 currentPos = currentPos + 3;
335 }
336 else {
337 // take all until next separator, and that is value
338 // do not insert BinaryObject+index into line, just set right currentPos
339 // and insert value into vector
340 // binary value is always beteween quotes (")
341 binaryValue = line.substring(currentPos);
342 binaryValue = binaryValue.substring(1,
343 binaryValue.indexOf(separator) -
344 1);
345 values.add(binaryValue);
346 currentPos += binaryValue.length() + 3;
347 }
348 }
349 //set currentColumn++
350 currentColumn++;
351 continue;
352 }
353 } else {
354 throw new SQLException("Invalid csv format : file = "+new File(fileName).getAbsolutePath()+", line = "+line);
355 }
356
357
358 //parse one by one character
359 currentChar = line.charAt(currentPos);
360 if (value.length() == 0 && currentChar == '"' && !inQuotedString) {
361 //enter here if we are at start of column value
362 currentPos++;
363 inQuotedString = true;
364 continue;
365 }
366
367 if (currentChar == '"') {
368 //get next character
369 char nextChar = line.charAt(currentPos + 1);
370 //if we have "", consider it as ", and add it to value
371 if (nextChar == '"') {
372 value += currentChar;
373 currentPos++;
374 }
375 else {
376 //enter here if we are at end of column value
377 // if (!inQuotedString) {
378 // throw new SQLException("Unexpected '\"' in position " +
379 // currentPos + ". Line=" + orgLine);
380 // }
381 if (inQuotedString && nextChar == separator) {
382 //throw new SQLException("Expecting " + separator +
383 // " in position " + (currentPos + 1) +
384 // ". Line=" + orgLine);
385
386 //set currentPos to comma after value
387 currentPos++;
388 //if value is empty string between double quotes consider it as empty string
389 //else if value is empty string between commas consider it as null value
390 if (this.trimString)
391 value=value.trim();
392 values.add(value);
393 currentColumn++;
394 value = "";
395 inQuotedString = false;
396 }
397 else {
398 if (nextChar != separator){
399 value += currentChar;
400 }
401 }
402 }
403 }
404
405 else {
406 //when we are at end of column value, and value is not inside of double quotes
407 if (currentChar == separator) {
408 //when have separator in data
409 if (inQuotedString) {
410 value += currentChar;
411 }
412 else {
413 //if value is empty string between double quotes consider it as empty string
414 //else if value is empty string between commas consider it as null value
415 if (this.trimString)
416 value=value.trim();
417 if( value.equals("") )
418 value = null;
419
420 values.add(value);
421 currentColumn++;
422 value = "";
423 }
424 }
425 else {
426 value += currentChar;
427 }
428 }
429
430 currentPos++;
431 } //end while
432
433 if (inQuotedString) {
434 // Remove extra , added at start
435 value = value.substring(0, value.length() - 1);
436 try {
437 line = this.readLine();
438 }
439 catch (IOException e) {
440 throw new SQLException(e.toString());
441 }
442 }
443 else {
444 fullLine = 1;
445 }
446
447 }// end while( fullLine == 0 )
448 String[] retVal = new String[values.size()];
449 values.toArray(retVal);
450
451 return retVal;
452 }
453
454
455 /***
456 *
457 * Parse csv line, whithout columnTypes.
458 *
459 * @param line
460 * @return array with values or column names.
461 * @throws SQLException
462 */
463 protected String[] parseCsvLineAsHeader(String line) throws SQLException
464 {
465 Vector values = new Vector();
466 // ArrayList columnTypesList = new ArrayList();
467 boolean inQuotedString = false;
468 String value = "";
469 String orgLine = line;
470 int currentPos = 0;
471 int fullLine = 0;
472
473 while (fullLine == 0) {
474 currentPos = 0;
475 line += separator;
476 while (currentPos < line.length()) {
477 char currentChar = line.charAt(currentPos);
478 if (value.length() == 0 && currentChar == '"' && !inQuotedString) {
479 currentPos++;
480 inQuotedString = true;
481 continue;
482 }
483 if (currentChar == '"') {
484 char nextChar = line.charAt(currentPos + 1);
485 if (nextChar == '"') {
486 value += currentChar;
487 currentPos++;
488 }
489 else {
490 // if (!inQuotedString) {
491 // throw new SQLException("Unexpected '\"' in position " +
492 // currentPos + ". Line=" + orgLine);
493 // }
494 if (inQuotedString && nextChar == separator) {
495 // throw new SQLException("Expecting " + separator + " in position " +
496 // (currentPos + 1) + ". Line=" + orgLine);
497 // }
498 if (this.trimString)
499 value=value.trim();
500 if (value.endsWith("-"+CsvDriver.BINARY_TYPE)) {
501 value = value.substring(0,value.indexOf("-"+CsvDriver.BINARY_TYPE));
502 columnTypes1.put(value, CsvDriver.BINARY_TYPE);
503 colTypes.add(CsvDriver.BINARY_TYPE);
504 }
505 else{
506 columnTypes1.put(value, CsvDriver.VARCHAR_TYPE);
507 colTypes.add(CsvDriver.VARCHAR_TYPE);
508 }
509 values.add(value);
510 value = "";
511 inQuotedString = false;
512 currentPos++;
513 }
514 else{
515 if (nextChar != separator){
516 value += currentChar;
517 }
518 }
519 }
520 }
521 else {
522 if (currentChar == separator) {
523 if (inQuotedString) {
524 value += currentChar;
525 }
526 else {
527 if (this.trimString)
528 value=value.trim();
529 if (value.endsWith("-"+CsvDriver.BINARY_TYPE)) {
530 value = value.substring(0,value.indexOf("-"+CsvDriver.BINARY_TYPE));
531 columnTypes1.put(value, CsvDriver.BINARY_TYPE);
532 colTypes.add(CsvDriver.BINARY_TYPE);
533 }
534 else{
535 columnTypes1.put(value, CsvDriver.VARCHAR_TYPE);
536 colTypes.add(CsvDriver.VARCHAR_TYPE);
537 }
538 values.add(value);
539 value = "";
540 }
541 }
542 else {
543 value += currentChar;
544 }
545 }
546 currentPos++;
547 }
548 if (inQuotedString) {
549 value = value.substring(0, value.length() - 1);
550 try {
551 line = this.readLine();
552 }
553 catch (IOException e) {
554 throw new SQLException(e.toString());
555 }
556 }
557 else {
558 fullLine = 1;
559 }
560 }
561 String[] retVal = new String[values.size()];
562 values.copyInto(retVal);
563
564
565 return retVal;
566
567 }
568
569 private String formatString(String str) throws SQLException {
570 String retValue = str;
571 try {
572 //replace spec. characters
573 retValue = Utils.replaceAll(retValue,this.lineBreakEscape, "\n");
574 retValue = Utils.replaceAll(retValue,this.carriageReturnEscape, "\r");
575 }catch(Exception e) {
576 throw new SQLException("Error while reformat string ! : "+str);
577 }
578 return retValue;
579 }
580
581 private String readLine() throws IOException {
582 String retVal = "";
583 if(Utils.isUTF16(this.charset)) {
584 retVal = this.randomReader.readCsvLine();
585 } else {
586 retVal = bufReader.readLine();
587 }
588 return retVal;
589 }
590
591 private void closeInputs() throws IOException {
592 if(!Utils.isUTF16(this.charset)) {
593 if(this.bufReader != null)
594 bufReader.close();
595 } else {
596 if(this.randomReader != null)
597 randomReader.close();
598 }
599 }
600
601
602 }
603
This page was automatically generated by Maven