001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    
019    package org.apache.commons.net.nntp;
020    
021    /**
022     * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
023     * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
024     * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
025     *  
026     * @author rwinston <rwinston@checkfree.com>
027     *
028     */
029    
030    import java.util.HashMap;
031    import java.util.Iterator;
032    
033    public class Threader {
034        private ThreadContainer root;
035        private HashMap<String,ThreadContainer> idTable;
036        private int bogusIdCount = 0;
037    
038        /**
039         * The main threader entry point - The client passes in an array of Threadable objects, and 
040         * the Threader constructs a connected 'graph' of messages
041         * @param messages
042         * @return
043         */
044        public Threadable thread(Threadable[] messages) {
045            if (messages == null)
046                return null;
047    
048            idTable = new HashMap<String,ThreadContainer>();
049    
050            // walk through each Threadable element
051            for (int i = 0; i < messages.length; ++i) {
052                if (!messages[i].isDummy())
053                    buildContainer(messages[i]);
054            }
055    
056            root = findRootSet();
057            idTable.clear();
058            idTable = null;
059    
060            pruneEmptyContainers(root);
061    
062            root.reverseChildren();
063            gatherSubjects();
064    
065            if (root.next != null)
066                throw new RuntimeException("root node has a next:" + root);
067    
068            for (ThreadContainer r = root.child; r != null; r = r.next) {
069                if (r.threadable == null)
070                    r.threadable = r.child.threadable.makeDummy();
071            }
072    
073            Threadable result = (root.child == null ? null : root.child.threadable);
074            root.flush();
075            root = null;
076    
077            return result;
078        }
079    
080        /**
081         * 
082         * @param threadable
083         */
084        private void buildContainer(Threadable threadable) {
085            String id = threadable.messageThreadId();
086            ThreadContainer container = idTable.get(id);
087    
088            // A ThreadContainer exists for this id already. This should be a forward reference, but may 
089            // be a duplicate id, in which case we will need to generate a bogus placeholder id
090            if (container != null) {
091                if (container.threadable != null) { // oops! duplicate ids...
092                    id = "<Bogus-id:" + (bogusIdCount++) + ">";
093                    container = null;
094                } else {
095                    // The container just contained a forward reference to this message, so let's
096                    // fill in the threadable field of the container with this message
097                    container.threadable = threadable;
098                }
099            }
100    
101            // No container exists for that message Id. Create one and insert it into the hash table.
102            if (container == null) {
103                container = new ThreadContainer();
104                container.threadable = threadable;
105                idTable.put(id, container);
106            }
107    
108            // Iterate through all of the references and create ThreadContainers for any references that
109            // don't have them.
110            ThreadContainer parentRef = null;
111            {
112                String[] references = threadable.messageThreadReferences();
113                for (int i = 0; i < references.length; ++i) {
114                    String refString = references[i];
115                    ThreadContainer ref = idTable.get(refString);
116    
117                    // if this id doesnt have a container, create one
118                    if (ref == null) {
119                        ref = new ThreadContainer();
120                        idTable.put(refString, ref);
121                    }
122    
123                    // Link references together in the order they appear in the References: header,
124                    // IF they dont have a have a parent already &&
125                    // IF it will not cause a circular reference
126                    if ((parentRef != null)
127                        && (ref.parent == null)
128                        && (parentRef != ref)
129                        && !(parentRef.findChild(ref))) {
130                        // Link ref into the parent's child list
131                        ref.parent = parentRef;
132                        ref.next = parentRef.child;
133                        parentRef.child = ref;
134                    }
135                    parentRef = ref;
136                }
137            }
138    
139            // parentRef is now set to the container of the last element in the references field. make that 
140            // be the parent of this container, unless doing so causes a circular reference
141            if (parentRef != null
142                && (parentRef == container || container.findChild(parentRef)))
143                parentRef = null;
144    
145            // if it has a parent already, its because we saw this message in a References: field, and presumed
146            // a parent based on the other entries in that field. Now that we have the actual message, we can
147            // throw away the old parent and use this new one
148            if (container.parent != null) {
149                ThreadContainer rest, prev;
150    
151                for (prev = null, rest = container.parent.child;
152                    rest != null;
153                    prev = rest, rest = rest.next) {
154                    if (rest == container)
155                        break;
156                }
157    
158                if (rest == null) {
159                    throw new RuntimeException(
160                        "Didnt find "
161                            + container
162                            + " in parent"
163                            + container.parent);
164                }
165    
166                // Unlink this container from the parent's child list
167                if (prev == null)
168                    container.parent.child = container.next;
169                else
170                    prev.next = container.next;
171    
172                container.next = null;
173                container.parent = null;
174            }
175    
176            // If we have a parent, link container into the parents child list
177            if (parentRef != null) {
178                container.parent = parentRef;
179                container.next = parentRef.child;
180                parentRef.child = container;
181            }
182        }
183    
184        /**
185         * Find the root set of all existing ThreadContainers
186         * @return root the ThreadContainer representing the root node
187         */
188        private ThreadContainer findRootSet() {
189            ThreadContainer root = new ThreadContainer();
190            Iterator<String> iter = idTable.keySet().iterator();
191    
192            while (iter.hasNext()) {
193                Object key = iter.next();
194                ThreadContainer c = idTable.get(key);
195                if (c.parent == null) {
196                    if (c.next != null)
197                        throw new RuntimeException(
198                            "c.next is " + c.next.toString());
199                    c.next = root.child;
200                    root.child = c;
201                }
202            }
203            return root;
204        }
205    
206        /**
207         * Delete any empty or dummy ThreadContainers
208         * @param parent
209         */
210        private void pruneEmptyContainers(ThreadContainer parent) {
211            ThreadContainer container, prev, next;
212            for (prev = null, container = parent.child, next = container.next;
213                container != null;
214                prev = container,
215                    container = next,
216                    next = (container == null ? null : container.next)) {
217    
218                // Is it empty and without any children? If so,delete it 
219                if (container.threadable == null && container.child == null) {
220                    if (prev == null)
221                        parent.child = container.next;
222                    else
223                        prev.next = container.next;
224    
225                    // Set container to prev so that prev keeps its same value the next time through the loop
226                    container = prev;
227                }
228    
229                // Else if empty, with kids, and (not at root or only one kid)
230                else if (
231                    container.threadable == null
232                        && container.child != null
233                        && (container.parent != null
234                            || container.child.next == null)) {
235                    // We have an invalid/expired message with kids. Promote the kids to this level. 
236                    ThreadContainer tail;
237                    ThreadContainer kids = container.child;
238    
239                    // Remove this container and replace with 'kids'.
240                    if (prev == null)
241                        parent.child = kids;
242                    else
243                        prev.next = kids;
244    
245                    // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
246                    // i.e. splice kids into the list in place of container
247                    for (tail = kids; tail.next != null; tail = tail.next)
248                        tail.parent = container.parent;
249    
250                    tail.parent = container.parent;
251                    tail.next = container.next;
252    
253                    // next currently points to the item after the inserted items in the chain - reset that so we process the newly
254                    // promoted items next time round
255                    next = kids;
256    
257                    // Set container to prev so that prev keeps its same value the next time through the loop
258                    container = prev;
259                } else if (container.child != null) {
260                    // A real message , with kids
261                    // Iterate over the children
262                    pruneEmptyContainers(container);
263                }
264            }
265        }
266    
267        /**
268         *  If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 
269         */
270        private void gatherSubjects() {
271    
272            int count = 0;
273    
274            for (ThreadContainer c = root.child; c != null; c = c.next)
275                count++;
276    
277            // TODO verify this will avoid rehashing
278            HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
279            count = 0;
280    
281            for (ThreadContainer c = root.child; c != null; c = c.next) {
282                Threadable threadable = c.threadable;
283    
284                // No threadable? If so, it is a dummy node in the root set.
285                // Only root set members may be dummies, and they alway have at least 2 kids
286                // Take the first kid as representative of the subject
287                if (threadable == null)
288                    threadable = c.child.threadable;
289    
290                String subj = threadable.simplifiedSubject();
291    
292                if (subj == null || subj == "")
293                    continue;
294    
295                ThreadContainer old = subjectTable.get(subj);
296    
297                // Add this container to the table iff:
298                // - There exists no container with this subject
299                // - or this is a dummy container and the old one is not - the dummy one is
300                // more interesting as a root, so put it in the table instead
301                // - The container in the table has a "Re:" version of this subject, and 
302                // this container has a non-"Re:" version of this subject. The non-"Re:" version
303                // is the more interesting of the two.
304                if (old == null
305                    || (c.threadable == null && old.threadable != null)
306                    || (old.threadable != null
307                        && old.threadable.subjectIsReply()
308                        && c.threadable != null
309                        && !c.threadable.subjectIsReply())) {
310                    subjectTable.put(subj, c);
311                    count++;
312                }
313            }
314    
315            // If the table is empty, we're done
316            if (count == 0)
317                return;
318    
319            // subjectTable is now populated with one entry for each subject which occurs in the 
320            // root set. Iterate over the root set, and gather together the difference.
321            ThreadContainer prev, c, rest;
322            for (prev = null, c = root.child, rest = c.next;
323                c != null;
324                prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
325                Threadable threadable = c.threadable;
326    
327                // is it a dummy node?
328                if (threadable == null)
329                    threadable = c.child.threadable;
330    
331                String subj = threadable.simplifiedSubject();
332    
333                // Dont thread together all subjectless messages
334                if (subj == null || subj == "")
335                    continue;
336    
337                ThreadContainer old = subjectTable.get(subj);
338    
339                if (old == c) // That's us
340                    continue;
341    
342                // We have now found another container in the root set with the same subject
343                // Remove the "second" message from the root set
344                if (prev == null)
345                    root.child = c.next;
346                else
347                    prev.next = c.next;
348                c.next = null;
349    
350                if (old.threadable == null && c.threadable == null) {
351                    // both dummies - merge them
352                    ThreadContainer tail;
353                    for (tail = old.child;
354                        tail != null && tail.next != null;
355                        tail = tail.next);
356    
357                    tail.next = c.child;
358    
359                    for (tail = c.child; tail != null; tail = tail.next)
360                        tail.parent = old;
361    
362                    c.child = null;
363                } else if (
364                    old.threadable == null
365                        || (c.threadable != null
366                            && c.threadable.subjectIsReply()
367                            && !old.threadable.subjectIsReply())) {
368                    // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
369                    c.parent = old;
370                    c.next = old.child;
371                    old.child = c;
372                } else {
373                    // else make the old and new messages be children of a new dummy container.
374                    // We create a new container object for old.msg and empty the old container
375                    ThreadContainer newc = new ThreadContainer();
376                    newc.threadable = old.threadable;
377                    newc.child = old.child;
378    
379                    for (ThreadContainer tail = newc.child;
380                        tail != null;
381                        tail = tail.next)
382                        tail.parent = newc;
383    
384                    old.threadable = null;
385                    old.child = null;
386    
387                    c.parent = old;
388                    newc.parent = old;
389    
390                    // Old is now a dummy- give it 2 kids , c and newc
391                    old.child = c;
392                    c.next = newc;
393                }
394                // We've done a merge, so keep the same prev
395                c = prev;
396            }
397    
398            subjectTable.clear();
399            subjectTable = null;
400    
401        }
402    }
403    
404    /**
405     * A placeholder utility class, used for constructing a tree of Threadables
406     * Originall implementation by Jamie Zawinski. 
407     * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a>
408     * Threadable objects
409     * @author Rory Winston <rwinston@checkfree.com>
410     */
411    class ThreadContainer {
412        Threadable threadable;
413        ThreadContainer parent;
414        ThreadContainer prev;
415        ThreadContainer next;
416        ThreadContainer child;
417    
418        /**
419         * 
420         * @param container
421         * @return true if child is under self's tree. Detects circular references
422         */
423        boolean findChild(ThreadContainer target) {
424            if (child == null)
425                return false;
426    
427            else if (child == target)
428                return true;
429            else
430                return child.findChild(target);
431        }
432    
433        // Copy the ThreadContainer tree structure down into the underlying Threadable objects
434        // (Make the Threadable tree look like the ThreadContainer tree)
435        void flush() {
436            if (parent != null && threadable == null)
437                throw new RuntimeException("no threadable in " + this.toString());
438    
439            parent = null;
440    
441            if (threadable != null)
442                threadable.setChild(child == null ? null : child.threadable);
443    
444            if (child != null) {
445                child.flush();
446                child = null;
447            }
448    
449            if (threadable != null)
450                threadable.setNext(next == null ? null : next.threadable);
451    
452            if (next != null) {
453                next.flush();
454                next = null;
455            }
456    
457            threadable = null;
458        }
459    
460        /**
461         * Reverse the entire set of children
462         *
463         */
464        void reverseChildren() {
465            if (child != null) {
466                ThreadContainer kid, prev, rest;
467                for (prev = null, kid = child, rest = kid.next;
468                    kid != null;
469                    prev = kid,
470                        kid = rest,
471                        rest = (rest == null ? null : rest.next))
472                    kid.next = prev;
473    
474                child = prev;
475    
476                // Do it for the kids 
477                for (kid = child; kid != null; kid = kid.next)
478                    kid.reverseChildren();
479            }
480        }
481    }