[498] in Zephyr_Bugs
ZHM patches to randomly choose a new server
daemon@ATHENA.MIT.EDU (Derek Atkins)
Sat Jul 31 23:17:29 1993
To: bug-zephyr@Athena.MIT.EDU
Cc: rel-eng@Athena.MIT.EDU, probe@Athena.MIT.EDU, shapere@Athena.MIT.EDU
Date: Sat, 31 Jul 93 23:17:23 EDT
From: Derek Atkins <warlord@Athena.MIT.EDU>
Its a known problem that zhm will hose the other servers when one goes
down, since all the zhm's will choose the same new_server. This patch
fixes that problem, by randomly choosing a new server to change to.
I've tested this, and it works properly (I zctl new_server'ed, and it
changed from b11zephyr to arilinn to neskaya, and then back to
arilinn, instead of the order ala hesiod.
There is also a patch to allow for POSIX signal semantics to be built
automatically, instead of depending on "SOLARIS". That way these
sources will work on linux as well.
Hopefully someone will make something of these patches, and they wont
go into the bitbucket. I should add that these patches are to the
sources in /source/athena/athena.lib/zephyr/zhm.
One other thing, the sources in /mit/zephyr do not match the sources
in /source. They have diverged. This is a bad thing, and should be
fixed, somehow.
These sources can be found in /mit/zephyr/tba/warlord/zhm.
Enjoy!
-derek
*** /tmp/,RCSt1a12413 Sat Jul 31 23:09:20 1993
--- zhm.c Sat Jul 31 23:01:15 1993
***************
*** 46,51 ****
--- 46,52 ----
long starttime;
u_short cli_port;
struct sockaddr_in cli_sin, serv_sin, from;
+ int numserv;
char **serv_list, **cur_serv_list;
char prim_serv[MAXHOSTNAMELEN], cur_serv[MAXHOSTNAMELEN];
char *zcluster;
***************
*** 115,120 ****
--- 116,124 ----
exit(2);
}
+ /* Init random number generator */
+ srandom(time((long *) 0));
+
/* Override server argument? */
if (optind < argc) {
if ((hp = gethostbyname(argv[optind++])) == NULL) {
***************
*** 132,137 ****
--- 136,142 ----
serv_list[i] = strsave(hp->h_name);
i++;
}
+ numserv = i; /* Number of servers */
serv_list[i] = NULL;
}
#ifdef Z_HaveHesiod
***************
*** 196,205 ****
clust_info[j++] = strsave(serv_list[i]);
}
clust_info[j] = NULL;
serv_list = clust_info;
}
if (!prim_serv[0] && j) {
- srandom(time((long *) 0));
(void) strcpy(prim_serv, serv_list[random() % j]);
}
#endif
--- 201,210 ----
clust_info[j++] = strsave(serv_list[i]);
}
clust_info[j] = NULL;
+ numserv = j;
serv_list = clust_info;
}
if (!prim_serv[0] && j) {
(void) strcpy(prim_serv, serv_list[random() % j]);
}
#endif
***************
*** 398,409 ****
send_boot_notice(HM_BOOT);
deactivated = 0;
! #ifdef SOLARIS
! sa.sa_handler = (void (*)()) set_sig_type;
! sigemptyset(&sa.sa_mask);
! sigaction(SIGHUP, &sa,0);
! sigaction(SIGALRM, &sa, 0);
! sigaction(SIGTERM, &sa,0);
#else
(void)signal (SIGHUP, set_sig_type);
(void)signal (SIGALRM, set_sig_type);
--- 403,418 ----
send_boot_notice(HM_BOOT);
deactivated = 0;
! #ifdef SA_INTERRUPT
! /* We want this for POSIX signals, not just solaris (i.e.,
! * linux needs this, too. -warlord
! */
! sa.sa_handler = (void (*)()) set_sig_type;
!
! sigemptyset(&sa.sa_mask);
! sigaction(SIGHUP, &sa,0);
! sigaction(SIGALRM, &sa, 0);
! sigaction(SIGTERM, &sa,0);
#else
(void)signal (SIGHUP, set_sig_type);
(void)signal (SIGALRM, set_sig_type);
*** /tmp/,RCSt1a12418 Sat Jul 31 23:09:46 1993
--- zhm_server.c Sat Jul 31 22:45:37 1993
***************
*** 24,29 ****
--- 24,30 ----
extern struct sockaddr_in serv_sin, from;
extern int timeout_type, hmdebug, nservchang, booting, nserv, no_server;
extern int deactivated, rebootflag;
+ extern int numserv; /* number of servers in the serv_list */
extern char **serv_list, **cur_serv_list;
extern char cur_serv[], prim_serv[];
extern void die_gracefully();
***************
*** 93,98 ****
--- 94,104 ----
}
}
+ /* Find a new server. We might get passed in a server, in which
+ * case we should try that one first (if it's in our list of servers)
+ * If we either can't find it or were not passed in a suggested server,
+ * then randomly pick a new server.
+ */
find_next_server(sugg_serv)
char *sugg_serv;
{
***************
*** 99,160 ****
struct hostent *hp;
int done = 0;
char **parse = serv_list;
if (sugg_serv) {
! do {
! if (!strcmp(*parse, sugg_serv))
! done = 1;
! } while ((done == 0) && (*++parse != NULL));
}
! if (done) {
! if ((hp = gethostbyname(sugg_serv)) != NULL) {
! DPR2 ("Server = %s\n", sugg_serv);
! (void)strcpy(cur_serv, sugg_serv);
! if (hmdebug)
! syslog(LOG_DEBUG, "Suggested server: %s\n", sugg_serv);
! } else {
! done = 0;
! sleep(1);
! }
! }
! if (!done)
! do {
! if ((++serv_loop > 3) && (strcmp(cur_serv, prim_serv))) {
! serv_loop = 0;
! if ((hp = gethostbyname(prim_serv)) != NULL) {
! DPR2 ("Server = %s\n", prim_serv);
! (void)strcpy(cur_serv, prim_serv);
! done = 1;
! } else
! sleep(1);
! } else {
! if (*++cur_serv_list == NULL) {
! /* Exit if we are rebooting, */
! /* and cannot find a server. */
! if (rebootflag)
! die_gracefully();
! cur_serv_list = serv_list;
! if (!cur_serv_list[1]) {
! /* server list has only one entry, use it */
! if ((hp = gethostbyname(*cur_serv_list))
! != NULL) {
! DPR2 ("Server = %s\n", *cur_serv_list);
! (void)strcpy(cur_serv, *cur_serv_list);
! done = 1;
! } else
! sleep(1);
! }
! }
! if (strcmp(*cur_serv_list, cur_serv)) {
! if ((hp = gethostbyname(*cur_serv_list)) != NULL){
! DPR2 ("Server = %s\n", *cur_serv_list);
! (void)strcpy(cur_serv, *cur_serv_list);
! done = 1;
! } else
! sleep(1);
! }
! }
! } while (done == 0);
bcopy(hp->h_addr, (char *)&serv_sin.sin_addr, hp->h_length);
nservchang++;
}
--- 105,157 ----
struct hostent *hp;
int done = 0;
char **parse = serv_list;
+ char *new_serv;
+ /* First, check to see if we can find the suggested server. */
if (sugg_serv) {
! do {
! if (!strcmp(*parse, sugg_serv))
! done = 1;
! } while ((done == 0) && (*++parse != NULL));
!
! /* If we found it, then see if it resolves to a real
! * machine. If it does, great! Use it. If not, then
! * we're not done.
! */
! if (done) {
! if ((hp = gethostbyname(sugg_serv)) != NULL) {
! DPR2 ("Server = %s\n", sugg_serv);
! (void)strcpy(cur_serv, sugg_serv);
! if (hmdebug)
! syslog(LOG_DEBUG, "Suggested server: %s\n", sugg_serv);
! } else {
! done = 0;
! sleep(1);
! }
! }
}
!
! /* If we do not have a server at this point, then let's keep
! * randomly choosing one until we can actually resolve the
! * machine name!
! */
! while (!done) {
! new_serv = serv_list[random() % numserv];
!
! /* Make sure we didn't pick the same server we started with */
! if (!strcmp(new_serv, cur_serv))
! continue;
!
! if ((hp = gethostbyname(new_serv)) != NULL) {
! DPR2 ("Server = %s\n", new_serv);
! (void)strcpy(cur_serv, new_serv);
! if (hmdebug)
! syslog(LOG_DEBUG, "New server: %s\n", new_serv);
! done = 1;
! } else
! sleep(1);
!
! } /* while */
bcopy(hp->h_addr, (char *)&serv_sin.sin_addr, hp->h_length);
nservchang++;
}