[2374] in Kerberos-V5-bugs

home help back first fref pref prev next nref lref last post

pending/132: Re: Replay cache hell

daemon@ATHENA.MIT.EDU (Jonathan I. Kamens)
Wed Oct 23 13:46:32 1996

Resent-From: gnats@rt-11.MIT.EDU (GNATS Management)
Resent-To: gnats-admin@rt-11.MIT.EDU
Resent-Reply-To: krb5-bugs@MIT.EDU, jik@cam.ov.com (Jonathan I. Kamens)
Date: Wed, 23 Oct 1996 13:40:23 -0400
From: jik@cam.ov.com (Jonathan I. Kamens)
To: kenh@cmf.nrl.navy.mil (Ken Hornstein)
Cc: kerberos@MIT.EDU, krb5-bugs@MIT.EDU
In-Reply-To: <199610231628.MAA01236@ginger.cmf.nrl.navy.mil>


>Number:         132
>Category:       pending
>Synopsis:       Re: Replay cache hell
>Confidential:   yes
>Severity:       serious
>Priority:       medium
>Responsible:    gnats-admin
>State:          open
>Class:          sw-bug
>Submitter-Id:   unknown
>Arrival-Date:   Wed Oct e 13:46:00 EDT 1996
>Last-Modified:
>Originator:
>Organization:
>Release:
>Environment:
>Description:
>How-To-Repeat:
>Fix:
>Audit-Trail:
>Unformatted:
Here's the description of this bug in our (OV's) bug database:

>	krb5_recvauth relies on the replay cache code to automatically
>figure out when there are too many expired entires in a replay cache
>and expunge them.
>
>	The only time that the replay cache code does an automatic
>expunge is when it encounters more than 30 cache entries in a single
>hash bucket.  This doesn't happen until the replay cache on disk is
>very, very large and there are many, many expired entries in it.  The
>result of this is that applications which use krb5_recvauth will start
>out running at an acceptable speed, but will gradually get slower and
>slower as they are used more, because their replay caches grow and
>aren't expunged and therefore take a long time to recover each time the
>application starts up.  I observed this behavior with the new popper
>which I installed on pad-thai.
>
>	The fix for this is to also do an automatic expunge when
>recovering the cache, if more than 30 expired entries are encountered
>read in.

Here's the patch (against krb5 beta 4; I don't know if it'll still
apply cleanly to beta 7, but it should be pretty close) with the
changes I made to fix the problem:

---cut here---
--- rc_dfl.c@@/main/1	Mon May 22 23:05:29 1995
+++ rc_dfl.c@@/main/2	Thu Apr 18 03:30:57 1996
@@ -54,8 +54,12 @@
 of live krb5_donot_replays by EXCESSREPS. With the defaults here, a typical
 cache might build up some 10K of expired krb5_donot_replays before an automatic
 expunge, with the waste basically independent of the number of stores per
-minute. */
+minute.
 
+The rcache will also automatically be expunged when it encounters more
+than EXCESSREPS expired entries when recovering a cache in
+dfl_recover. */
+
 static int hash(rep, hsize)
 krb5_donot_replay *rep;
 int hsize;
@@ -109,6 +113,7 @@
 #ifndef NOIOSTUFF
   krb5_rc_iostuff d;
 #endif
+  char recovering;
  }
 ;
 
@@ -270,6 +275,7 @@
 #ifndef NOIOSTUFF
     t->d.fd = -1;
 #endif
+    t->recovering = 0;
     return 0;
     
 cleanup:
@@ -374,13 +380,16 @@
 #else
 
     struct dfl_data *t = (struct dfl_data *)id->data;
-    krb5_donot_replay *rep;
+    krb5_donot_replay *rep = 0;
     krb5_error_code retval;
     int max_size;
+    int expired_entries = 0;
 
     if (retval = krb5_rc_io_open(&t->d,t->name))
 	return retval;
- 
+
+    t->recovering = 1;
+
     max_size = krb5_rc_io_size(&t->d);
  
     rep = NULL;
@@ -391,18 +400,17 @@
 
     /* now read in each auth_replay and insert into table */
     for (;;) {
-	rep = NULL;
 	if (krb5_rc_io_mark(&t->d)) {
 	    retval = KRB5_RC_IO;
 	    goto io_fail;
 	}
 	
-	if (!(rep = (krb5_donot_replay *) malloc(sizeof(krb5_donot_replay)))) {
+	if (!(rep ||
+	      (rep = (krb5_donot_replay *) malloc(sizeof(krb5_donot_replay))))) {
 	    retval = KRB5_RC_MALLOC;
 	    goto io_fail;
 	}
-	rep->client = NULL;
-	rep->server = NULL;
+	memset(rep, 0, sizeof(*rep));
 	
 	retval = krb5_rc_io_fetch (t, rep, max_size);
 
@@ -414,6 +422,7 @@
 	
 	if (alive(rep,t->lifespan) == CMP_EXPIRED) {
 	    krb5_rc_free_entry(&rep);
+	    expired_entries++;
 	    continue;
 	}
 
@@ -427,7 +436,6 @@
 	 */
 	FREE(rep->server);
 	FREE(rep->client);
-	rep = NULL;
     }
     retval = 0;
     krb5_rc_io_unmark(&t->d);
@@ -439,6 +447,9 @@
     krb5_rc_free_entry(&rep);
     if (retval)
 	krb5_rc_io_close(&t->d);
+    else if (expired_entries > EXCESSREPS)
+      retval = krb5_rc_dfl_expunge(id);
+    t->recovering = 0;
     return retval;
     
 #endif
@@ -450,7 +461,7 @@
 {
     int clientlen, serverlen, len;
     char *buf, *ptr;
-    unsigned long ret;
+    krb5_error_code ret;
 
     clientlen = strlen (rep->client) + 1;
     serverlen = strlen (rep->server) + 1;
@@ -476,7 +487,7 @@
 krb5_rcache id;
 krb5_donot_replay *rep;
 {
-    unsigned long ret;
+    krb5_error_code ret;
     struct dfl_data *t = (struct dfl_data *)id->data;
 
     switch(store(id,rep)) {
@@ -543,17 +554,20 @@
     krb5_rcache tmp;
     krb5_deltat lifespan = t->lifespan;  /* save original lifespan */
 
-    name = t->name;
-    t->name = 0;		/* Clear name so it isn't freed */
-    (void) krb5_rc_dfl_close_no_free(id);
-    retval = krb5_rc_dfl_resolve(id, name);
-    free(name);
-    if (retval)
+    if (! t->recovering) {
+      name = t->name;
+      t->name = 0;		/* Clear name so it isn't freed */
+      (void) krb5_rc_dfl_close_no_free(id);
+      retval = krb5_rc_dfl_resolve(id, name);
+      free(name);
+      if (retval)
 	return retval;
-    retval = krb5_rc_dfl_recover(id);
-    if (retval)
+      retval = krb5_rc_dfl_recover(id);
+      if (retval)
 	return retval;
-    t = (struct dfl_data *)id->data; /* point to recovered cache */
+      t = (struct dfl_data *)id->data; /* point to recovered cache */
+    }
+
     tmp = (krb5_rcache) malloc(sizeof(*tmp));
     if (!tmp)
 	return ENOMEM;
@@ -574,6 +588,7 @@
 	return KRB5_RC_IO;
     if (krb5_rc_io_move(&t->d, &((struct dfl_data *)tmp->data)->d))
 	return KRB5_RC_IO;
+    krb5_rc_dfl_close(tmp);
 #endif
     return 0;
 }
---cut here---

It looks like we never submitted this patch to krb5-bugs@mit.edu, so
I'm CC'ing this message there.

-- 
Jonathan Kamens  |  OpenVision Technologies, Inc.  |   jik@cam.ov.com

home help back first fref pref prev next nref lref last post