001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 019 package org.apache.commons.net.nntp; 020 021 /** 022 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski. 023 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details. 024 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a> 025 * 026 * @author rwinston <rwinston@checkfree.com> 027 * 028 */ 029 030 import java.util.HashMap; 031 import java.util.Iterator; 032 033 public class Threader { 034 private ThreadContainer root; 035 private HashMap<String,ThreadContainer> idTable; 036 private int bogusIdCount = 0; 037 038 /** 039 * The main threader entry point - The client passes in an array of Threadable objects, and 040 * the Threader constructs a connected 'graph' of messages 041 * @param messages 042 * @return 043 */ 044 public Threadable thread(Threadable[] messages) { 045 if (messages == null) 046 return null; 047 048 idTable = new HashMap<String,ThreadContainer>(); 049 050 // walk through each Threadable element 051 for (int i = 0; i < messages.length; ++i) { 052 if (!messages[i].isDummy()) 053 buildContainer(messages[i]); 054 } 055 056 root = findRootSet(); 057 idTable.clear(); 058 idTable = null; 059 060 pruneEmptyContainers(root); 061 062 root.reverseChildren(); 063 gatherSubjects(); 064 065 if (root.next != null) 066 throw new RuntimeException("root node has a next:" + root); 067 068 for (ThreadContainer r = root.child; r != null; r = r.next) { 069 if (r.threadable == null) 070 r.threadable = r.child.threadable.makeDummy(); 071 } 072 073 Threadable result = (root.child == null ? null : root.child.threadable); 074 root.flush(); 075 root = null; 076 077 return result; 078 } 079 080 /** 081 * 082 * @param threadable 083 */ 084 private void buildContainer(Threadable threadable) { 085 String id = threadable.messageThreadId(); 086 ThreadContainer container = idTable.get(id); 087 088 // A ThreadContainer exists for this id already. This should be a forward reference, but may 089 // be a duplicate id, in which case we will need to generate a bogus placeholder id 090 if (container != null) { 091 if (container.threadable != null) { // oops! duplicate ids... 092 id = "<Bogus-id:" + (bogusIdCount++) + ">"; 093 container = null; 094 } else { 095 // The container just contained a forward reference to this message, so let's 096 // fill in the threadable field of the container with this message 097 container.threadable = threadable; 098 } 099 } 100 101 // No container exists for that message Id. Create one and insert it into the hash table. 102 if (container == null) { 103 container = new ThreadContainer(); 104 container.threadable = threadable; 105 idTable.put(id, container); 106 } 107 108 // Iterate through all of the references and create ThreadContainers for any references that 109 // don't have them. 110 ThreadContainer parentRef = null; 111 { 112 String[] references = threadable.messageThreadReferences(); 113 for (int i = 0; i < references.length; ++i) { 114 String refString = references[i]; 115 ThreadContainer ref = idTable.get(refString); 116 117 // if this id doesnt have a container, create one 118 if (ref == null) { 119 ref = new ThreadContainer(); 120 idTable.put(refString, ref); 121 } 122 123 // Link references together in the order they appear in the References: header, 124 // IF they dont have a have a parent already && 125 // IF it will not cause a circular reference 126 if ((parentRef != null) 127 && (ref.parent == null) 128 && (parentRef != ref) 129 && !(parentRef.findChild(ref))) { 130 // Link ref into the parent's child list 131 ref.parent = parentRef; 132 ref.next = parentRef.child; 133 parentRef.child = ref; 134 } 135 parentRef = ref; 136 } 137 } 138 139 // parentRef is now set to the container of the last element in the references field. make that 140 // be the parent of this container, unless doing so causes a circular reference 141 if (parentRef != null 142 && (parentRef == container || container.findChild(parentRef))) 143 parentRef = null; 144 145 // if it has a parent already, its because we saw this message in a References: field, and presumed 146 // a parent based on the other entries in that field. Now that we have the actual message, we can 147 // throw away the old parent and use this new one 148 if (container.parent != null) { 149 ThreadContainer rest, prev; 150 151 for (prev = null, rest = container.parent.child; 152 rest != null; 153 prev = rest, rest = rest.next) { 154 if (rest == container) 155 break; 156 } 157 158 if (rest == null) { 159 throw new RuntimeException( 160 "Didnt find " 161 + container 162 + " in parent" 163 + container.parent); 164 } 165 166 // Unlink this container from the parent's child list 167 if (prev == null) 168 container.parent.child = container.next; 169 else 170 prev.next = container.next; 171 172 container.next = null; 173 container.parent = null; 174 } 175 176 // If we have a parent, link container into the parents child list 177 if (parentRef != null) { 178 container.parent = parentRef; 179 container.next = parentRef.child; 180 parentRef.child = container; 181 } 182 } 183 184 /** 185 * Find the root set of all existing ThreadContainers 186 * @return root the ThreadContainer representing the root node 187 */ 188 private ThreadContainer findRootSet() { 189 ThreadContainer root = new ThreadContainer(); 190 Iterator<String> iter = idTable.keySet().iterator(); 191 192 while (iter.hasNext()) { 193 Object key = iter.next(); 194 ThreadContainer c = idTable.get(key); 195 if (c.parent == null) { 196 if (c.next != null) 197 throw new RuntimeException( 198 "c.next is " + c.next.toString()); 199 c.next = root.child; 200 root.child = c; 201 } 202 } 203 return root; 204 } 205 206 /** 207 * Delete any empty or dummy ThreadContainers 208 * @param parent 209 */ 210 private void pruneEmptyContainers(ThreadContainer parent) { 211 ThreadContainer container, prev, next; 212 for (prev = null, container = parent.child, next = container.next; 213 container != null; 214 prev = container, 215 container = next, 216 next = (container == null ? null : container.next)) { 217 218 // Is it empty and without any children? If so,delete it 219 if (container.threadable == null && container.child == null) { 220 if (prev == null) 221 parent.child = container.next; 222 else 223 prev.next = container.next; 224 225 // Set container to prev so that prev keeps its same value the next time through the loop 226 container = prev; 227 } 228 229 // Else if empty, with kids, and (not at root or only one kid) 230 else if ( 231 container.threadable == null 232 && container.child != null 233 && (container.parent != null 234 || container.child.next == null)) { 235 // We have an invalid/expired message with kids. Promote the kids to this level. 236 ThreadContainer tail; 237 ThreadContainer kids = container.child; 238 239 // Remove this container and replace with 'kids'. 240 if (prev == null) 241 parent.child = kids; 242 else 243 prev.next = kids; 244 245 // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next 246 // i.e. splice kids into the list in place of container 247 for (tail = kids; tail.next != null; tail = tail.next) 248 tail.parent = container.parent; 249 250 tail.parent = container.parent; 251 tail.next = container.next; 252 253 // next currently points to the item after the inserted items in the chain - reset that so we process the newly 254 // promoted items next time round 255 next = kids; 256 257 // Set container to prev so that prev keeps its same value the next time through the loop 258 container = prev; 259 } else if (container.child != null) { 260 // A real message , with kids 261 // Iterate over the children 262 pruneEmptyContainers(container); 263 } 264 } 265 } 266 267 /** 268 * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 269 */ 270 private void gatherSubjects() { 271 272 int count = 0; 273 274 for (ThreadContainer c = root.child; c != null; c = c.next) 275 count++; 276 277 // TODO verify this will avoid rehashing 278 HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9); 279 count = 0; 280 281 for (ThreadContainer c = root.child; c != null; c = c.next) { 282 Threadable threadable = c.threadable; 283 284 // No threadable? If so, it is a dummy node in the root set. 285 // Only root set members may be dummies, and they alway have at least 2 kids 286 // Take the first kid as representative of the subject 287 if (threadable == null) 288 threadable = c.child.threadable; 289 290 String subj = threadable.simplifiedSubject(); 291 292 if (subj == null || subj == "") 293 continue; 294 295 ThreadContainer old = subjectTable.get(subj); 296 297 // Add this container to the table iff: 298 // - There exists no container with this subject 299 // - or this is a dummy container and the old one is not - the dummy one is 300 // more interesting as a root, so put it in the table instead 301 // - The container in the table has a "Re:" version of this subject, and 302 // this container has a non-"Re:" version of this subject. The non-"Re:" version 303 // is the more interesting of the two. 304 if (old == null 305 || (c.threadable == null && old.threadable != null) 306 || (old.threadable != null 307 && old.threadable.subjectIsReply() 308 && c.threadable != null 309 && !c.threadable.subjectIsReply())) { 310 subjectTable.put(subj, c); 311 count++; 312 } 313 } 314 315 // If the table is empty, we're done 316 if (count == 0) 317 return; 318 319 // subjectTable is now populated with one entry for each subject which occurs in the 320 // root set. Iterate over the root set, and gather together the difference. 321 ThreadContainer prev, c, rest; 322 for (prev = null, c = root.child, rest = c.next; 323 c != null; 324 prev = c, c = rest, rest = (rest == null ? null : rest.next)) { 325 Threadable threadable = c.threadable; 326 327 // is it a dummy node? 328 if (threadable == null) 329 threadable = c.child.threadable; 330 331 String subj = threadable.simplifiedSubject(); 332 333 // Dont thread together all subjectless messages 334 if (subj == null || subj == "") 335 continue; 336 337 ThreadContainer old = subjectTable.get(subj); 338 339 if (old == c) // That's us 340 continue; 341 342 // We have now found another container in the root set with the same subject 343 // Remove the "second" message from the root set 344 if (prev == null) 345 root.child = c.next; 346 else 347 prev.next = c.next; 348 c.next = null; 349 350 if (old.threadable == null && c.threadable == null) { 351 // both dummies - merge them 352 ThreadContainer tail; 353 for (tail = old.child; 354 tail != null && tail.next != null; 355 tail = tail.next); 356 357 tail.next = c.child; 358 359 for (tail = c.child; tail != null; tail = tail.next) 360 tail.parent = old; 361 362 c.child = null; 363 } else if ( 364 old.threadable == null 365 || (c.threadable != null 366 && c.threadable.subjectIsReply() 367 && !old.threadable.subjectIsReply())) { 368 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old 369 c.parent = old; 370 c.next = old.child; 371 old.child = c; 372 } else { 373 // else make the old and new messages be children of a new dummy container. 374 // We create a new container object for old.msg and empty the old container 375 ThreadContainer newc = new ThreadContainer(); 376 newc.threadable = old.threadable; 377 newc.child = old.child; 378 379 for (ThreadContainer tail = newc.child; 380 tail != null; 381 tail = tail.next) 382 tail.parent = newc; 383 384 old.threadable = null; 385 old.child = null; 386 387 c.parent = old; 388 newc.parent = old; 389 390 // Old is now a dummy- give it 2 kids , c and newc 391 old.child = c; 392 c.next = newc; 393 } 394 // We've done a merge, so keep the same prev 395 c = prev; 396 } 397 398 subjectTable.clear(); 399 subjectTable = null; 400 401 } 402 } 403 404 /** 405 * A placeholder utility class, used for constructing a tree of Threadables 406 * Originall implementation by Jamie Zawinski. 407 * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a> 408 * Threadable objects 409 * @author Rory Winston <rwinston@checkfree.com> 410 */ 411 class ThreadContainer { 412 Threadable threadable; 413 ThreadContainer parent; 414 ThreadContainer prev; 415 ThreadContainer next; 416 ThreadContainer child; 417 418 /** 419 * 420 * @param container 421 * @return true if child is under self's tree. Detects circular references 422 */ 423 boolean findChild(ThreadContainer target) { 424 if (child == null) 425 return false; 426 427 else if (child == target) 428 return true; 429 else 430 return child.findChild(target); 431 } 432 433 // Copy the ThreadContainer tree structure down into the underlying Threadable objects 434 // (Make the Threadable tree look like the ThreadContainer tree) 435 void flush() { 436 if (parent != null && threadable == null) 437 throw new RuntimeException("no threadable in " + this.toString()); 438 439 parent = null; 440 441 if (threadable != null) 442 threadable.setChild(child == null ? null : child.threadable); 443 444 if (child != null) { 445 child.flush(); 446 child = null; 447 } 448 449 if (threadable != null) 450 threadable.setNext(next == null ? null : next.threadable); 451 452 if (next != null) { 453 next.flush(); 454 next = null; 455 } 456 457 threadable = null; 458 } 459 460 /** 461 * Reverse the entire set of children 462 * 463 */ 464 void reverseChildren() { 465 if (child != null) { 466 ThreadContainer kid, prev, rest; 467 for (prev = null, kid = child, rest = kid.next; 468 kid != null; 469 prev = kid, 470 kid = rest, 471 rest = (rest == null ? null : rest.next)) 472 kid.next = prev; 473 474 child = prev; 475 476 // Do it for the kids 477 for (kid = child; kid != null; kid = kid.next) 478 kid.reverseChildren(); 479 } 480 } 481 }