1 package sharin.csv.parser;
2
3 class BasicCsvAutomaton {
4
5 private enum State {
6 INITIAL, NEW_RECORD, NEW_RECORD_CR, SEPARATED, SEPARATED_CR, VALUE, VALUE_CR, QUOTED, ESCAPED, ESCAPED_CR, FINAL
7 };
8
9 private final CsvHandler handler;
10
11 private final char separator;
12
13 private State state;
14
15 public BasicCsvAutomaton(CsvHandler handler, char separator) {
16 this.handler = handler;
17 this.separator = separator;
18 state = State.INITIAL;
19 }
20
21 public void put(int ch) {
22
23 switch (state) {
24
25 case INITIAL:
26 putOnInitial(ch);
27 break;
28
29 case NEW_RECORD:
30 putOnNewRecord(ch);
31 break;
32
33 case NEW_RECORD_CR:
34 putOnNewRecordCR(ch);
35 break;
36
37 case VALUE:
38 putOnValue(ch);
39 break;
40
41 case VALUE_CR:
42 putOnValueCR(ch);
43 break;
44
45 case SEPARATED:
46 putOnSeparated(ch);
47 break;
48
49 case SEPARATED_CR:
50 putOnSeparatedCR(ch);
51 break;
52
53 case QUOTED:
54 putOnQuoted(ch);
55 break;
56
57 case ESCAPED:
58 putOnEscaped(ch);
59 break;
60
61 case ESCAPED_CR:
62 putOnEscapedCR(ch);
63 break;
64
65 default:
66
67 break;
68 }
69 }
70
71 private void putOnInitial(int ch) {
72 handler.startDocument();
73 putOnNewRecord(ch);
74 }
75
76 private void putOnNewRecord(int ch) {
77
78 if (ch == separator) {
79 handler.startRecord();
80 handler.startValue();
81 handler.endValue();
82 state = State.SEPARATED;
83
84 } else if (ch == '"') {
85 handler.startRecord();
86 handler.startValue();
87 state = State.QUOTED;
88
89 } else if (ch == '\r') {
90 handler.startRecord();
91 handler.endRecord();
92 state = State.NEW_RECORD_CR;
93
94 } else if (ch == '\n') {
95 handler.startRecord();
96 handler.endRecord();
97 state = State.NEW_RECORD;
98
99 } else if (ch == -1) {
100 handler.endDocument();
101 state = State.FINAL;
102
103 } else {
104 handler.startRecord();
105 handler.startValue();
106 handler.character((char) ch);
107 state = State.VALUE;
108 }
109 }
110
111 private void putOnNewRecordCR(int ch) {
112
113 if (ch == '\n') {
114 state = State.NEW_RECORD;
115
116 } else {
117 putOnNewRecord(ch);
118 }
119 }
120
121 private void putOnValue(int ch) {
122
123 if (ch == separator) {
124 handler.endValue();
125 state = State.SEPARATED;
126
127 } else if (ch == '"') {
128 handler.character((char) ch);
129 state = State.VALUE;
130
131 } else if (ch == '\r') {
132 handler.endValue();
133 handler.endRecord();
134 state = State.VALUE_CR;
135
136 } else if (ch == '\n') {
137 handler.endValue();
138 handler.endRecord();
139 state = State.NEW_RECORD;
140
141 } else if (ch == -1) {
142 handler.endValue();
143 handler.endRecord();
144 handler.endDocument();
145 state = State.FINAL;
146
147 } else {
148 handler.character((char) ch);
149 state = State.VALUE;
150 }
151 }
152
153 private void putOnValueCR(int ch) {
154
155 if (ch == '\n') {
156 state = State.NEW_RECORD;
157
158 } else {
159 putOnNewRecord(ch);
160 }
161 }
162
163 private void putOnSeparated(int ch) {
164
165 if (ch == separator) {
166 handler.startValue();
167 handler.endValue();
168 state = State.SEPARATED;
169
170 } else if (ch == '"') {
171 handler.startValue();
172 state = State.QUOTED;
173
174 } else if (ch == '\r') {
175 handler.startValue();
176 handler.endValue();
177 handler.endRecord();
178 state = State.SEPARATED_CR;
179
180 } else if (ch == '\n') {
181 handler.startValue();
182 handler.endValue();
183 handler.endRecord();
184 state = State.NEW_RECORD;
185
186 } else if (ch == -1) {
187 handler.startValue();
188 handler.endValue();
189 handler.endRecord();
190 handler.endDocument();
191 state = State.FINAL;
192
193 } else {
194 handler.startValue();
195 handler.character((char) ch);
196 state = State.VALUE;
197 }
198 }
199
200 private void putOnSeparatedCR(int ch) {
201
202 if (ch == '\n') {
203 state = State.NEW_RECORD;
204
205 } else {
206 putOnNewRecord(ch);
207 }
208 }
209
210 private void putOnQuoted(int ch) {
211
212 if (ch == separator) {
213 handler.character((char) ch);
214 state = State.QUOTED;
215
216 } else if (ch == '"') {
217 state = State.ESCAPED;
218
219 } else if (ch == '\r') {
220 handler.character((char) ch);
221 state = State.QUOTED;
222
223 } else if (ch == '\n') {
224 handler.character((char) ch);
225 state = State.QUOTED;
226
227 } else if (ch == -1) {
228 handler.endValue();
229 handler.endRecord();
230 handler.endDocument();
231 state = State.FINAL;
232
233 } else {
234 handler.character((char) ch);
235 state = State.QUOTED;
236 }
237 }
238
239 private void putOnEscaped(int ch) {
240
241 if (ch == separator) {
242 handler.endValue();
243 state = State.SEPARATED;
244
245 } else if (ch == '"') {
246 handler.character((char) ch);
247 state = State.QUOTED;
248
249 } else if (ch == '\r') {
250 handler.endValue();
251 handler.endRecord();
252 state = State.ESCAPED_CR;
253
254 } else if (ch == '\n') {
255 handler.endValue();
256 handler.endRecord();
257 state = State.NEW_RECORD;
258
259 } else if (ch == -1) {
260 handler.endValue();
261 handler.endRecord();
262 handler.endDocument();
263 state = State.FINAL;
264
265 } else {
266 handler.character((char) ch);
267 state = State.VALUE;
268 }
269 }
270
271 private void putOnEscapedCR(int ch) {
272
273 if (ch == '\n') {
274 state = State.NEW_RECORD;
275
276 } else {
277 putOnNewRecord(ch);
278 }
279 }
280 }