View Javadoc

1   package sharin.csv.parser;
2   
3   class BasicCsvAutomaton {
4   
5       private enum State {
6           INITIAL, NEW_RECORD, NEW_RECORD_CR, SEPARATED, SEPARATED_CR, VALUE, VALUE_CR, QUOTED, ESCAPED, ESCAPED_CR, FINAL
7       };
8   
9       private final CsvHandler handler;
10  
11      private final char separator;
12  
13      private State state;
14  
15      public BasicCsvAutomaton(CsvHandler handler, char separator) {
16          this.handler = handler;
17          this.separator = separator;
18          state = State.INITIAL;
19      }
20  
21      public void put(int ch) {
22  
23          switch (state) {
24  
25          case INITIAL:
26              putOnInitial(ch);
27              break;
28  
29          case NEW_RECORD:
30              putOnNewRecord(ch);
31              break;
32  
33          case NEW_RECORD_CR:
34              putOnNewRecordCR(ch);
35              break;
36  
37          case VALUE:
38              putOnValue(ch);
39              break;
40  
41          case VALUE_CR:
42              putOnValueCR(ch);
43              break;
44  
45          case SEPARATED:
46              putOnSeparated(ch);
47              break;
48  
49          case SEPARATED_CR:
50              putOnSeparatedCR(ch);
51              break;
52  
53          case QUOTED:
54              putOnQuoted(ch);
55              break;
56  
57          case ESCAPED:
58              putOnEscaped(ch);
59              break;
60  
61          case ESCAPED_CR:
62              putOnEscapedCR(ch);
63              break;
64  
65          default:
66              /* do nothing */
67              break;
68          }
69      }
70  
71      private void putOnInitial(int ch) {
72          handler.startDocument();
73          putOnNewRecord(ch);
74      }
75  
76      private void putOnNewRecord(int ch) {
77  
78          if (ch == separator) {
79              handler.startRecord();
80              handler.startValue();
81              handler.endValue();
82              state = State.SEPARATED;
83  
84          } else if (ch == '"') {
85              handler.startRecord();
86              handler.startValue();
87              state = State.QUOTED;
88  
89          } else if (ch == '\r') {
90              handler.startRecord();
91              handler.endRecord();
92              state = State.NEW_RECORD_CR;
93  
94          } else if (ch == '\n') {
95              handler.startRecord();
96              handler.endRecord();
97              state = State.NEW_RECORD;
98  
99          } else if (ch == -1) {
100             handler.endDocument();
101             state = State.FINAL;
102 
103         } else {
104             handler.startRecord();
105             handler.startValue();
106             handler.character((char) ch);
107             state = State.VALUE;
108         }
109     }
110 
111     private void putOnNewRecordCR(int ch) {
112 
113         if (ch == '\n') {
114             state = State.NEW_RECORD;
115 
116         } else {
117             putOnNewRecord(ch);
118         }
119     }
120 
121     private void putOnValue(int ch) {
122 
123         if (ch == separator) {
124             handler.endValue();
125             state = State.SEPARATED;
126 
127         } else if (ch == '"') {
128             handler.character((char) ch);
129             state = State.VALUE;
130 
131         } else if (ch == '\r') {
132             handler.endValue();
133             handler.endRecord();
134             state = State.VALUE_CR;
135 
136         } else if (ch == '\n') {
137             handler.endValue();
138             handler.endRecord();
139             state = State.NEW_RECORD;
140 
141         } else if (ch == -1) {
142             handler.endValue();
143             handler.endRecord();
144             handler.endDocument();
145             state = State.FINAL;
146 
147         } else {
148             handler.character((char) ch);
149             state = State.VALUE;
150         }
151     }
152 
153     private void putOnValueCR(int ch) {
154 
155         if (ch == '\n') {
156             state = State.NEW_RECORD;
157 
158         } else {
159             putOnNewRecord(ch);
160         }
161     }
162 
163     private void putOnSeparated(int ch) {
164 
165         if (ch == separator) {
166             handler.startValue();
167             handler.endValue();
168             state = State.SEPARATED;
169 
170         } else if (ch == '"') {
171             handler.startValue();
172             state = State.QUOTED;
173 
174         } else if (ch == '\r') {
175             handler.startValue();
176             handler.endValue();
177             handler.endRecord();
178             state = State.SEPARATED_CR;
179 
180         } else if (ch == '\n') {
181             handler.startValue();
182             handler.endValue();
183             handler.endRecord();
184             state = State.NEW_RECORD;
185 
186         } else if (ch == -1) {
187             handler.startValue();
188             handler.endValue();
189             handler.endRecord();
190             handler.endDocument();
191             state = State.FINAL;
192 
193         } else {
194             handler.startValue();
195             handler.character((char) ch);
196             state = State.VALUE;
197         }
198     }
199 
200     private void putOnSeparatedCR(int ch) {
201 
202         if (ch == '\n') {
203             state = State.NEW_RECORD;
204 
205         } else {
206             putOnNewRecord(ch);
207         }
208     }
209 
210     private void putOnQuoted(int ch) {
211 
212         if (ch == separator) {
213             handler.character((char) ch);
214             state = State.QUOTED;
215 
216         } else if (ch == '"') {
217             state = State.ESCAPED;
218 
219         } else if (ch == '\r') {
220             handler.character((char) ch);
221             state = State.QUOTED;
222 
223         } else if (ch == '\n') {
224             handler.character((char) ch);
225             state = State.QUOTED;
226 
227         } else if (ch == -1) {
228             handler.endValue();
229             handler.endRecord();
230             handler.endDocument();
231             state = State.FINAL;
232 
233         } else {
234             handler.character((char) ch);
235             state = State.QUOTED;
236         }
237     }
238 
239     private void putOnEscaped(int ch) {
240 
241         if (ch == separator) {
242             handler.endValue();
243             state = State.SEPARATED;
244 
245         } else if (ch == '"') {
246             handler.character((char) ch);
247             state = State.QUOTED;
248 
249         } else if (ch == '\r') {
250             handler.endValue();
251             handler.endRecord();
252             state = State.ESCAPED_CR;
253 
254         } else if (ch == '\n') {
255             handler.endValue();
256             handler.endRecord();
257             state = State.NEW_RECORD;
258 
259         } else if (ch == -1) {
260             handler.endValue();
261             handler.endRecord();
262             handler.endDocument();
263             state = State.FINAL;
264 
265         } else {
266             handler.character((char) ch);
267             state = State.VALUE;
268         }
269     }
270 
271     private void putOnEscapedCR(int ch) {
272 
273         if (ch == '\n') {
274             state = State.NEW_RECORD;
275 
276         } else {
277             putOnNewRecord(ch);
278         }
279     }
280 }