XRootD
Loading...
Searching...
No Matches
XrdCmsCluster.cc
Go to the documentation of this file.
1/******************************************************************************/
2/* */
3/* X r d C m s C l u s t e r . c c */
4/* */
5/* (c) 2007 by the Board of Trustees of the Leland Stanford, Jr., University */
6/* All Rights Reserved */
7/* Produced by Andrew Hanushevsky for Stanford University under contract */
8/* DE-AC02-76-SFO0515 with the Department of Energy */
9/* */
10/* This file is part of the XRootD software suite. */
11/* */
12/* XRootD is free software: you can redistribute it and/or modify it under */
13/* the terms of the GNU Lesser General Public License as published by the */
14/* Free Software Foundation, either version 3 of the License, or (at your */
15/* option) any later version. */
16/* */
17/* XRootD is distributed in the hope that it will be useful, but WITHOUT */
18/* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or */
19/* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public */
20/* License for more details. */
21/* */
22/* You should have received a copy of the GNU Lesser General Public License */
23/* along with XRootD in a file called COPYING.LESSER (LGPL license) and file */
24/* COPYING (GPL license). If not, see <http://www.gnu.org/licenses/>. */
25/* */
26/* The copyright holder's institutional names and contributor's names may not */
27/* be used to endorse or promote products derived from this software without */
28/* specific prior written permission of the institution or contributor. */
29/******************************************************************************/
30
31#include <cerrno>
32#include <fcntl.h>
33#include <cstdio>
34#include <cstdlib>
35#include <random>
36#include <unistd.h>
37#include <netinet/in.h>
38#include <sys/types.h>
39
41
42#include "Xrd/XrdJob.hh"
43#include "Xrd/XrdLink.hh"
44#include "Xrd/XrdScheduler.hh"
45
48#include "XrdCms/XrdCmsCache.hh"
52#include "XrdCms/XrdCmsNode.hh"
53#include "XrdCms/XrdCmsRole.hh"
54#include "XrdCms/XrdCmsRRQ.hh"
55#include "XrdCms/XrdCmsState.hh"
57#include "XrdCms/XrdCmsTrace.hh"
58#include "XrdCms/XrdCmsTypes.hh"
59
60#include "XrdOuc/XrdOucPup.hh"
61
64#include "XrdSys/XrdSysTimer.hh"
65
66using namespace XrdCms;
67
68/******************************************************************************/
69/* G l o b a l O b j e c t s */
70/******************************************************************************/
71
73
74/******************************************************************************/
75/* L o c a l S t r u c t u r e s */
76/******************************************************************************/
77
79{
80public:
81
82 void DoIt() {if (nodeP)
83 {nodeP->Delete(Cluster.STMutex);
84 delete this;
85 } else {
86 if (!Cluster.Drop(nodeEnt, nodeInst, this)) delete this;
87 }
88 }
89
90 XrdCmsDrop(XrdCmsNode *nP) : XrdJob("delete node"), nodeP(nP),
91 nodeEnt(0), nodeInst(0)
92 {Sched->Schedule((XrdJob *)this);}
93
94 XrdCmsDrop(int nid, int inst) : XrdJob("drop node"), nodeP(0),
95 nodeEnt(nid), nodeInst(inst)
96 {Sched->Schedule((XrdJob *)this, time(0)+Config.DRPDelay);}
97
99
103};
104
105/******************************************************************************/
106/* C o n s t r u c t o r */
107/******************************************************************************/
108
110{
111 memset((void *)NodeTab, 0, sizeof(NodeTab));
112 memset((void *)AltMans, (int)' ', sizeof(AltMans));
113 AltMend = AltMans;
114 AltMent = -1;
115 NodeCnt = 0;
116 STHi = -1;
117 SelWtot = 0;
118 SelRtot = 0;
119 SelTcnt = 0;
120 peerHost = 0;
121 peerMask = ~peerHost;
122}
123
124/******************************************************************************/
125/* A d d */
126/******************************************************************************/
127
128XrdCmsNode *XrdCmsCluster::Add(XrdLink *lp, int port, int Status, int sport,
129 const char *theNID, const char *theIF)
130
131{
132 EPNAME("Add")
133 const char *act = "";
134 XrdCmsNode *nP = 0;
135 XrdCmsClustID *cidP = 0;
136 XrdSysRWLockHelper STMHelper(STMutex, false); // Need write lock!
137 int tmp, Slot, Free = -1, Bump1 = -1, Bump2 = -1, Bump3 = -1, aSet = 0;
138 bool Special = (Status & (CMS_isMan|CMS_isPeer));
139 bool SpecAlt = (Special && !(Status & CMS_isSuper));
140 bool Hidden = false;
141
142// Find available slot for this node. Here are the priorities:
143// Slot = Reconnecting node
144// Free = Available slot ( 1st in table)
145// Bump1 = Disconnected server (last in table)
146// Bump2 = Connected server (last in table) if new one is managr/peer
147// Bump3 = Disconnected managr/peer ( 1st in table) if new one is managr/peer
148//
149 for (Slot = 0; Slot < STMax; Slot++)
150 if (NodeTab[Slot])
151 {if (NodeTab[Slot]->isNode(lp, theNID, port)) break;
152/*Conn*/ if (NodeTab[Slot]->isConn)
153 {if (!NodeTab[Slot]->isPerm && Special)
154 Bump2 = Slot; // Last conn Server
155/*Disc*/ } else {
156 if ( NodeTab[Slot]->isPerm)
157 {if (Bump3 < 0 && Special) Bump3 = Slot;}// 1st disc Man/Pr
158 else Bump1 = Slot; // Last disc Server
159 }
160 } else if (Free < 0) Free = Slot; // 1st free slot
161
162// Check if node is already logged in or is a relogin
163//
164 if (Slot < STMax)
165 {if (NodeTab[Slot] && NodeTab[Slot]->isBound)
166 {Say.Emsg("Cluster", lp->ID, "already logged in.");
167 return 0;
168 } else { // Rehook node to previous unconnected entry
169 nP = NodeTab[Slot];
170 nP->Link = lp;
171 nP->isOffline = 0;
172 nP->isBad &= ~XrdCmsNode::isSuspend;
173 nP->isConn = 1;
174 nP->Instance++;
175 nP->setName(lp, theIF, port); // Just in case it changed
176 act = "Reconnect ";
177 }
178 }
179
180// First see if this node may be an alternate
181//
182 if (!nP && SpecAlt)
183 {if ((cidP = XrdCmsClustID::Find(theNID)) && !(cidP->IsEmpty()))
184 {if (!(nP = AddAlt(cidP, lp, port, Status, sport, theNID, theIF)))
185 return 0;
186 aSet = 1; Slot = nP->NodeID;
187 if (nP != NodeTab[Slot]) {Hidden = true; act = "Alternate ";}
188 }
189 }
190
191// Reuse an old ID if we must or redirect the incoming node
192//
193 if (!nP)
194 {if (Free >= 0) Slot = Free;
195 else {if (Bump1 >= 0) Slot = Bump1;
196 else Slot = (Bump2 >= 0 ? Bump2 : Bump3);
197 if (Slot < 0)
198 {if (Status & CMS_isPeer) Say.Emsg("Cluster", "Add peer", lp->ID,
199 "failed; too many subscribers.");
200 else {sendAList(lp);
201 DEBUG(lp->ID <<" redirected; too many subscribers.");
202 }
203 return 0;
204 }
205
206 if (Status & CMS_isMan) {setAltMan(Slot, lp, sport); aSet=1;}
207 if (NodeTab[Slot] && !(Status & CMS_isPeer))
208 sendAList(NodeTab[Slot]->Link);
209
210 DEBUG(lp->ID << " bumps " << NodeTab[Slot]->Ident <<" #" <<Slot);
211 NodeTab[Slot]->Lock();
212 Remove("redirected", NodeTab[Slot], -1);
213 act = "Shoved ";
214 }
215 NodeTab[Slot] = nP = new XrdCmsNode(lp, theIF, theNID, port, 0, Slot);
216 if (!cidP) cidP = XrdCmsClustID::AddID(theNID);
217 if ((cidP->AddNode(nP, SpecAlt))) nP->cidP = cidP;
218 else {delete nP; NodeTab[Slot] = 0; return 0;} // OK to do delete!
219 }
220
221// Indicate whether this snode can be redirected
222//
223 nP->isPerm = (Status & (CMS_isMan | CMS_isPeer)) ? 1 : 0;
224
225// Assign new server
226//
227 if (!aSet && (Status & CMS_isSuper)) setAltMan(Slot, lp, sport);
228 if (Slot > STHi) STHi = Slot;
229 nP->isBound = 1;
230 nP->isConn = 1;
231 nP->isNoStage = 0 != (Status & CMS_noStage);
232 nP->isBad |= (Status & CMS_Suspend ? XrdCmsNode::isSuspend : 0);
233 nP->isMan = 0 != (Status & CMS_isMan);
234 nP->isPeer = 0 != (Status & CMS_isPeer);
236 nP->subsPort = sport;
237
238// If this is an actual non-hidden node, count it
239//
240 if (!Hidden)
241 {NodeCnt++;
242 if (Config.SUPLevel
243 && (tmp = NodeCnt*Config.SUPLevel/100) > Config.SUPCount)
244 {Config.SUPCount=tmp; CmsState.Set(tmp);}
245 } else nP->isMan |= 0x02;
246
247// Compute new peer mask, as needed
248//
249 if (nP->isPeer) peerHost |= nP->NodeMask;
250 else peerHost &= ~nP->NodeMask;
251 peerMask = ~peerHost;
252
253// Document login
254//
255 if (QTRACE(Debug))
256 {DEBUG(act <<nP->Ident <<" to cluster " <<nP->myNID <<" slot "
257 <<Slot <<'.' <<nP->Instance <<" (nodecnt=" <<NodeCnt
258 <<" supn=" <<Config.SUPCount <<")");
259 }
260
261// Compute new state of all nodes if we are a reporting manager.
262//
263 if (Config.asManager() && !Hidden)
265 nP->isBad & XrdCmsNode::isSuspend ? 0 : 1,
266 nP->isNoStage ? 0 : 1);
267
268// All done. Return the node locked.
269//
270 nP->Lock();
271 return nP;
272}
273
274/******************************************************************************/
275/* Private: A d d A l t */
276/******************************************************************************/
277
278// Warning STMutex must be held in write mode by the caller!
279
280XrdCmsNode *XrdCmsCluster::AddAlt(XrdCmsClustID *cidP, XrdLink *lp,
281 int port, int Status, int sport,
282 const char *theNID, const char *theIF)
283
284{
285 EPNAME("AddAlt")
286 XrdCmsNode *pP, *nP = 0;
287 int slot = cidP->Slot();
288
289// Check if this node is already in the alternate table
290//
291 if (cidP->Exists(lp, theNID, port))
292 {Say.Emsg(epname, lp->ID, "already logged in.");
293 return 0;
294 }
295
296// Add this node if there is room
297//
298 if (cidP->Avail())
299 {nP = new XrdCmsNode(lp, theIF, theNID, port, 0, slot);
300 if (!(cidP->AddNode(nP, true))) {delete nP; nP = 0;} // OK to do delete!
301 }
302
303// Check if we were actually able to add this node
304//
305 if (!nP)
306 {Say.Emsg(epname, "Add alternate manager", lp->ID,
307 "failed; too many subscribers.");
308 return 0;
309 }
310
311// Check if the existing lead dead and we can substiture this one
312//
313 if ((pP = NodeTab[slot]) && !(pP->isBound))
314 {setAltMan(nP->NodeID, nP->Link, sport);
315 Say.Emsg("AddAlt", nP->Ident, "replacing dropped", pP->Ident);
316 NodeTab[slot] = nP;
317 pP->DropJob = new XrdCmsDrop(pP); // Schedule deletion
318 }
319
320// Hook the node to the cluster table and return
321//
322 nP->cidP = cidP;
323 return nP;
324}
325
326/******************************************************************************/
327/* B l a c k L i s t */
328/******************************************************************************/
329
331{
332 static CmsDiscRequest discRequest = {{0, kYR_disc, 0, 0}};
333 XrdCmsNode *nP;
334 const char *etxt = "blacklisted.";
335 int i, blRD = 0;
336 bool inBL;
337
338// Obtain a lock on the table. We need this in write mode!
339//
340 STMutex.WriteLock();
341
342// Run through the table looking to put or out of the blacklist
343//
344 for (i = 0; i <= STHi; i++)
345 {if ((nP = NodeTab[i]))
346 {inBL = (blP && (blRD = XrdCmsBlackList::Present(nP->Name(), blP)));
347 if ((!inBL && !(nP->isBad & XrdCmsNode::isBlisted))
348 || ( inBL && (nP->isBad & XrdCmsNode::isBlisted))) continue;
349 nP->g2nLock(STMutex); // Downgrade to only node lock
350 if (inBL)
352 if (blRD < -1)
353 {if (kYR_Version > nP->myVersion)
354 etxt = "blacklisted; redirect unsupported.";
355 else etxt = "blacklisted with redirect.";
357 nP->Send((char *)&discRequest, sizeof(discRequest));
358 }
359 Say.Emsg("Manager", nP->Name(), etxt);
360 } else {
362 Say.Emsg("Manager", nP->Name(), "removed from blacklist.");
363 }
364 nP->n2gLock(STMutex);
365 }
366 }
367 STMutex.UnLock();
368}
369
370/******************************************************************************/
371/* B r o a d c a s t */
372/******************************************************************************/
373
374SMask_t XrdCmsCluster::Broadcast(SMask_t smask, const struct iovec *iod,
375 int iovcnt, int iotot)
376{
377 EPNAME("Broadcast")
378 int i;
379 XrdCmsNode *nP;
380 SMask_t bmask, unQueried(0);
381
382// Obtain a lock on the table and screen out peer nodes
383//
384 STMutex.ReadLock(); // Sufficient to prevent modifications
385 bmask = smask & peerMask;
386
387// Run through the table looking for nodes to send messages to. We don't need
388// the node lock for this but we do need to up the reference count to keep the
389// node pointer valid for the duration of the send() (may or may not block).
390//
391 for (i = 0; i <= STHi; i++)
392 {if ((nP = NodeTab[i]) && nP->isNode(bmask))
393 {if (nP->isOffline) unQueried |= nP->Mask();
394 else {nP->Ref();
395 STMutex.UnLock();
396 if (nP->Send(iod, iovcnt, iotot) < 0)
397 {unQueried |= nP->Mask();
398 DEBUG(nP->Ident <<" is unreachable");
399 }
400 nP->unRef();
401 STMutex.ReadLock();
402 }
403 }
404 }
405 STMutex.UnLock();
406 return unQueried;
407}
408
409/******************************************************************************/
410
412 char *Data, int Dlen)
413{
414 struct iovec ioV[3], *iovP = &ioV[1];
415 unsigned short Temp;
416 int Blen;
417
418// Construct packed data for the character argument. If data is a string then
419// Dlen must include the null byte if it is specified at all.
420//
421 Blen = XrdOucPup::Pack(&iovP, Data, Temp, (Dlen ? strlen(Data)+1 : Dlen));
422 Hdr.datalen = htons(static_cast<unsigned short>(Blen));
423
424// Complete the iovec and send off the data
425//
426 ioV[0].iov_base = (char *)&Hdr; ioV[0].iov_len = sizeof(Hdr);
427 return Broadcast(smask, ioV, 3, Blen+sizeof(Hdr));
428}
429
430/******************************************************************************/
431
433 void *Data, int Dlen)
434{
435 struct iovec ioV[2] = {{(char *)&Hdr, sizeof(Hdr)},
436 {(char *)Data, (size_t)Dlen}};
437
438// Send of the data as eveything was constructed properly
439//
440 Hdr.datalen = htons(static_cast<unsigned short>(Dlen));
441 return Broadcast(smask, ioV, 2, Dlen+sizeof(Hdr));
442}
443
444/******************************************************************************/
445/* B r o a d s e n d */
446/******************************************************************************/
447
448// Send message to first eligible node!
449
451 void *Data, int Dlen)
452{
453 EPNAME("Broadsend");
454 static int Start = 0;
455 XrdCmsNode *nP;
456 struct iovec ioV[2] = {{(char *)&Hdr, sizeof(Hdr)},
457 {(char *)Data, (size_t)Dlen}};
458 int i, Beg, Fin, ioTot = Dlen+sizeof(Hdr);
459
460// Send of the data as eveything was constructed properly
461//
462 Hdr.datalen = htons(static_cast<unsigned short>(Dlen));
463
464// Obtain a lock on the table and get the starting and ending position. Note
465// that the mechnism we use will necessarily skip newly added nodes.
466//
467 STMutex.ReadLock(); // Sufficient to prevent modifications
468 Beg = Start = (Start <= STHi ? Start+1 : 0);
469 Fin = STHi;
470
471// Run through the table looking for a node to send a message to. We don't need
472// the node lock for this but we do need to up the reference count to keep the
473// node pointer valid for the duration of the send() (may or may not block).
474//
475do{for (i = Beg; i <= Fin; i++)
476 {if ((nP = NodeTab[i]) && nP->isNode(Who))
477 {if (nP->isOffline) continue;
478 nP->Ref();
479 STMutex.UnLock();
480 if (nP->Send(ioV, 2, ioTot) >= 0) {nP->unRef(); return 1;}
481 DEBUG(nP->Ident <<" is unreachable");
482 nP->unRef();
483 STMutex.ReadLock();
484 }
485 }
486 if (!Beg) break;
487 Fin = Beg-1; Beg = 0;
488 } while(1);
489
490// Did not send to anyone
491//
492 STMutex.UnLock();
493 return 0;
494}
495
496/******************************************************************************/
497/* g e t M a s k */
498/******************************************************************************/
499
501{
502 int i;
503 XrdCmsNode *nP;
504 SMask_t smask(0);
505
506// Obtain a lock on the table
507//
508 STMutex.ReadLock();
509
510// Run through the table looking for a node with matching IP address
511//
512 for (i = 0; i <= STHi; i++)
513 if ((nP = NodeTab[i]) && nP->isNode(addr))
514 {smask = nP->NodeMask; break;}
515
516// All done
517//
518 STMutex.UnLock();
519 return smask;
520}
521
522/******************************************************************************/
523
525{
526 return XrdCmsClustID::Mask(Cid);
527}
528
529/******************************************************************************/
530/* L i s t */
531/******************************************************************************/
532
534{
535 static const int iSize = XrdCmsSelected::IdentSize;
536 XrdCmsNode *nP;
537 XrdCmsSelected *sipp = 0, *sip;
539 XrdNetIF::ifType ifGet = ifType;
540 int i, destLen;
541 bool retName = (opts & LS_IDNT) != 0;
542 bool retAny = (opts & LS_ANY ) != 0;
543 bool retDest = retName || (opts & LS_IPO);
544
545// If only one wanted, the select appropriately
546//
547 oksel = false;
548 STMutex.ReadLock();
549 for (i = 0; i <= STHi; i++)
550 if ((nP=NodeTab[i]) && (nP->NodeMask & mask))
551 {oksel = true;
552 if (retDest)
553 { if (nP->netIF.HasDest(ifType)) ifGet = ifType;
554 else if (!retAny) continue;
555 else {ifGet = (XrdNetIF::ifType)(ifType ^ XrdNetIF::PrivateIF);
556 if (!nP->netIF.HasDest(ifGet)) continue;
557 }
558 }
559 sip = new XrdCmsSelected(sipp);
560 if (retDest) destLen = nP->netIF.GetDest(sip->Ident, iSize,
561 ifGet, retName);
562 else if (nP->myNlen >= XrdCmsSelected::IdentSize) destLen = 0;
563 else {strcpy(sip->Ident, nP->myName); destLen = nP->myNlen;}
564 if (!destLen) {delete sip; continue;}
565
566 sip->IdentLen = destLen;
567 sip->Mask = nP->NodeMask;
568 sip->Id = nP->NodeID;
569 sip->Port = nP->netIF.Port();
570 sip->RefTotW = nP->RefTotW;
571 sip->RefTotR = nP->RefTotR;
572 sip->Shrin = nP->Shrin;
573 sip->Share = nP->Share;
574 sip->RoleID = nP->RoleID;
575 sip->Status = (nP->isOffline ? XrdCmsSelected::Offline : 0);
577 sip->Status |= XrdCmsSelected::Disable;
578 if (nP->isNoStage) sip->Status |= XrdCmsSelected::NoStage;
580 sip->Status |= XrdCmsSelected::Suspend;
581 if (nP->isRW ) sip->Status |= XrdCmsSelected::isRW;
582 if (nP->isMan ) sip->Status |= XrdCmsSelected::isMangr;
583 sipp = sip;
584 }
585 STMutex.UnLock();
586
587// Return result
588//
589 return sipp;
590}
591
592/******************************************************************************/
593/* L o c a t e */
594/******************************************************************************/
595
597{
598 EPNAME("Locate");
599 XrdCmsPInfo pinfo;
600 SMask_t qfVec(0);
601 char *Path;
602 int retc = 0;
603
604// Check if this is a locate for all current servers
605//
606 if (*Sel.Path.Val != '*') Path = Sel.Path.Val;
607 else {if (*(Sel.Path.Val+1) == '\0')
608 {Sel.Vec.hf = ~0LL; Sel.Vec.pf = Sel.Vec.wf = 0;
609 return 0;
610 }
611 Path = Sel.Path.Val+1;
612 }
613
614// Find out who serves this path
615//
616 if (!Cache.Paths.Find(Path, pinfo) || !pinfo.rovec)
617 {Sel.Vec.hf = Sel.Vec.pf = Sel.Vec.wf = 0;
618 return NotFound;
619 } else Sel.Vec.wf = pinfo.rwvec;
620
621// Check if this was a non-lookup request
622//
623 if (*Sel.Path.Val == '*')
624 {Sel.Vec.hf = pinfo.rovec; Sel.Vec.pf = 0;
625 Sel.Vec.wf = pinfo.rwvec;
626 return 0;
627 }
628
629// Complete the request info object if we have one
630//
631 if (Sel.InfoP)
632 {Sel.InfoP->rwVec = pinfo.rwvec;
633 Sel.InfoP->isLU = 1;
634 }
635
636// If we are running a shared file system preform an optional restricted
637// pre-selection and then do a standard selection.
638//
639 if (baseFS.isDFS())
640 {SMask_t amask, smask, pmask;
641 amask = pmask = pinfo.rovec;
642 smask = (Sel.Opts & XrdCmsSelect::Online ? 0 : pinfo.ssvec & amask);
643 Sel.Resp.DLen = 0;
644 if (!(retc = SelDFS(Sel, amask, pmask, smask, 1)))
645 return (Sel.Opts & XrdCmsSelect::Asap && Sel.InfoP
646 ? Cache.WT4File(Sel,Sel.Vec.hf) : Config.LUPDelay);
647 if (retc < 0) return NotFound;
648 return 0;
649 }
650
651// First check if we have seen this file before. If so, get nodes that have it.
652// A Refresh request kills this because it's as if we hadn't seen it before.
653// If the file was found but either a query is in progress or we have a server
654// bounce; the client must wait.
655//
657 || !(retc = Cache.GetFile(Sel, pinfo.rovec)))
658 {Cache.AddFile(Sel, 0);
659 qfVec = pinfo.rovec; Sel.Vec.hf = 0;
660 } else qfVec = Sel.Vec.bf;
661
662// Compute the delay, if any
663//
664 if ((!qfVec && retc >= 0) || (Sel.Vec.hf && Sel.InfoP)) retc = 0;
665 else if (!(retc = Cache.WT4File(Sel, Sel.Vec.hf))) retc = Wait4CBk;
666
667// Check if we have to ask any nodes if they have the file
668//
669 if (qfVec)
670 {CmsStateRequest QReq = {{Sel.Path.Hash, kYR_state, kYR_raw, 0}};
671 if (Sel.Opts & XrdCmsSelect::Refresh)
673 TRACE(Files, "seeking " <<Sel.Path.Val);
674 qfVec = Cluster.Broadcast(qfVec, QReq.Hdr,
675 (void *)Sel.Path.Val, Sel.Path.Len+1);
676 if (qfVec) Cache.UnkFile(Sel, qfVec);
677 }
678 return retc;
679}
680
681/******************************************************************************/
682/* M o n P e r f */
683/******************************************************************************/
684
686{
687 CmsUsageRequest Usage = {{0, kYR_usage, 0, 0}};
688 struct iovec ioV[] = {{(char *)&Usage, sizeof(Usage)}};
689 int ioVnum = sizeof(ioV)/sizeof(struct iovec);
690 int ioVtot = sizeof(Usage);
691 SMask_t allNodes(~0);
692 int uInterval = Config.AskPing*Config.AskPerf;
693
694// Sleep for the indicated amount of time, then ask for load on each server
695//
696 while(uInterval)
697 {XrdSysTimer::Snooze(uInterval);
698 Broadcast(allNodes, ioV, ioVnum, ioVtot);
699 }
700 return (void *)0;
701}
702
703/******************************************************************************/
704/* M o n R e f s */
705/******************************************************************************/
706
708{
709 XrdCmsNode *nP;
710 int snooze_interval = 60, snooze_total = 0;
711 int rCnt = 0, wCnt = 0;
712 bool resetW, resetR, resetRW;
713
714// Sleep for the snooze interval. If a reset was requested then do a selective
715// reset unless we reached our snooze maximum and enough selections have gone
716// by; in which case, do a global reset.
717//
718 do {XrdSysTimer::Snooze(snooze_interval);
719 int totR = 0, totW = 0;
720
721 STMutex.ReadLock();
722 for (int i = 0; i <= STHi; i++)
723 {if ((nP = NodeTab[i]))
724 {totR += nP->RefTotR;
725 totW += nP->RefTotW;
726 }
727 }
728 STMutex.UnLock();
729
730 rCnt += (totR - SelRtot); SelRtot = totR;
731 wCnt += (totW - SelWtot); SelWtot = totW;
732 snooze_total += snooze_interval;
733
734 resetR = (rCnt >= Config.RefTurn);
735 resetW = (wCnt >= Config.RefTurn);
736 resetRW = (snooze_total >= Config.RefReset && (resetW || resetR));
737 if (resetRW)
738 {ResetRef((SMask_t)0);
739 if (resetR) rCnt = 0;
740 if (resetW) wCnt = 0;
741 snooze_total = 0;
742 }
743 } while(1);
744
745 return (void *)0;
746}
747
748/******************************************************************************/
749/* R e m o v e */
750/******************************************************************************/
751
752// Warning! The node object must be locked upon entry. The lock is released
753// upon deletion of the object.
754
756{
757 theNode->DropJob = new XrdCmsDrop(theNode);
758}
759
760// Warning! The node object must be locked upon entry. The lock is released
761// prior to returning to the caller. This entry obtains the node
762// table lock. When immed != 0 then the node is immediately dropped.
763// When immed if < 0 then the caller already holds the STMutex in
764// write mode and it is not released upon exit.
765
766void XrdCmsCluster::Remove(const char *reason, XrdCmsNode *theNode, int immed)
767{
768 EPNAME("Remove_Node")
769 struct theLocks
770 {XrdSysRWLock *myMutex;
771 XrdCmsNode *myNode;
772 int myNID;
773 int myInst;
774 bool hasLK;
775 bool doDrop;
776 char myIdent[510];
777
778 theLocks(XrdSysRWLock *mtx, XrdCmsNode *node, int immed)
779 : myMutex(mtx), myNode(node), hasLK(immed < 0),
780 doDrop(false)
781 {strlcpy(myIdent, node->Ident, sizeof(myIdent));
782 myNID = node->ID(myInst);
783 if (!hasLK)
784 {myNode->Ref(); // Keep alive
785 myNode->UnLock();
786 myMutex->WriteLock(); // Get global lock
787 myNode->Lock();
788 myNode->unRef(); // Can't escape now
789 }
790 }
791 ~theLocks()
792 {if (myNode)
793 {if (doDrop)
794 {myNode->isBound = 0;
795 myNode->DropTime = 0;
796 myNode->DropJob = new XrdCmsDrop(myNode);
797 myNode->UnLock();
798 } else myNode->UnLock();
799 }
800 if (!hasLK) myMutex->UnLock();
801 }
802 } LockHandler(&STMutex, theNode, immed);
803
804 XrdCmsNode *altNode = 0;
805 int Inst, NodeID = theNode->ID(Inst);
806
807// The LockHandler makes sure that the proper locks are obtained in a deadlock
808// free order. However, this may require that the node lock be released and
809// then re-aquired. We check if we are still dealing with same node at entry.
810// If not, issue message and high-tail it out.
811//
812 if (LockHandler.myNID != NodeID || LockHandler.myInst != Inst)
813 {Say.Emsg("Manager", LockHandler.myIdent, "removal aborted.");
814 DEBUG(LockHandler.myIdent <<" node " <<NodeID <<'.' <<Inst <<" != "
815 << LockHandler.myNID <<'.' <<LockHandler.myInst <<" at entry.");
816 }
817
818// Mark node as being offline and remove any drop job from it
819//
820 theNode->isOffline = 1; // Global lock is held here
821
822// If the node is connected we simply close the connection. This will cause
823// the connection handler to re-initiate the node removal. This condition
824// exists only if one node is being displaced by another node. The Disc()
825// may take a long time, but it's done async by default on the WAN and sync
826// on the LAN (local connections are fast enough and error-free for this).
827//
828 if (theNode->isConn)
829 {theNode->Disc(reason, 0);
830 theNode->isGone = 1; // Disc() sets the isOffline flag
831 return;
832 }
833
834// If we are not the primary node, then get rid of this node post-haste
835//
836 if (!(NodeTab[NodeID] == theNode))
837 {const char *why = (theNode->isMan ? "dropped as alternate."
838 : "dropped and redirected.");
839 Say.Emsg("Remove_Node", theNode->Ident, why);
840 LockHandler.doDrop = true;
841 return;
842 }
843
844
845// If the node is part of the cluster, do not count it anymore and
846// indicate new state of this nodes if we are a reporting manager
847//
848 if (theNode->isBound)
849 {theNode->isBound = 0;
850 NodeCnt--;
851 if (Config.asManager())
853 theNode->isBad & XrdCmsNode::isSuspend ? 0 : -1,
854 theNode->isNoStage ? 0 : -1);
855 }
856
857// If we have a working alternate, substitute it here and immediately drop
858// the former primary. This allows the cache to remain warm.
859//
860 if (theNode->isMan && theNode->cidP && !(theNode->cidP->IsSingle())
861 && (altNode = theNode->cidP->RemNode(theNode)))
862 {if (altNode->isBound) NodeCnt++;
863 NodeTab[NodeID] = altNode;
864 if (Config.asManager())
866 altNode->isBad & XrdCmsNode::isSuspend ? 0 : 1,
867 altNode->isNoStage ? 0 : 1);
868 setAltMan(altNode->NodeID, altNode->Link, altNode->subsPort);
869 Say.Emsg("Manager",altNode->Ident,"replacing dropped",theNode->Ident);
870 LockHandler.doDrop = true;
871 return;
872 }
873
874// If this is an immediate drop request, do so now. Drop() will delete
875// the node object, so remove the node lock and tell LockHandler that.
876//
877 if (immed || !Config.DRPDelay || theNode->isBad & XrdCmsNode::isDoomed)
878 {theNode->UnLock();
879 LockHandler.myNode = 0;
880 Drop(NodeID, Inst);
881 return;
882 }
883
884// If a drop job is already scheduled, update the instance field. Otherwise,
885// Schedule a node drop at a future time.
886//
887 theNode->DropTime = time(0)+Config.DRPDelay;
888 if (theNode->DropJob) theNode->DropJob->nodeInst = Inst;
889 else theNode->DropJob = new XrdCmsDrop(NodeID, Inst);
890
891// Document removal
892//
893 if (reason)
894 Say.Emsg("Manager", theNode->Ident, "scheduled for removal;", reason);
895 else DEBUG(theNode->Ident <<" node " <<NodeID <<'.' <<Inst);
896}
897
898/******************************************************************************/
899/* R e s e t R e f */
900/******************************************************************************/
901
902void XrdCmsCluster::ResetRef(SMask_t nMask, bool isLocked)
903{
904 XrdCmsNode *nP;
905 bool doAll (nMask == 0);
906
907// Obtain a lock on the table if not already locked
908//
909 if (!isLocked) STMutex.ReadLock();
910
911// Reset reference counts as needed. We can do this with a read lock as the
912// reference counters are atomic.
913//
914 for (int i = 0; i <= STHi; i++)
915 {if ((nP = NodeTab[i]) && (doAll || nP->isNode(nMask)))
916 {nP->RefW = 0;
917 nP->RefR = 0;
918 nP->Shrem = nP->Share;
919 }
920 }
921
922// Unlock table and exit
923//
924 if (!isLocked) STMutex.UnLock();
925}
926
927/******************************************************************************/
928/* S e l e c t */
929/******************************************************************************/
930
932{
933 EPNAME("Select");
934 XrdCmsPInfo pinfo;
935 const char *Amode;
936 int dowt = 0, retc = 0, isRW, fRD, noSel = (Sel.Opts & XrdCmsSelect::Defer);
937 SMask_t amask, smask, pmask;
938
939// Establish some local options
940//
941 if (Sel.Opts & XrdCmsSelect::Write)
942 {isRW = 1; Amode = "write";
943 if (Config.RWDelay)
944 if (Sel.Opts & XrdCmsSelect::Create && Config.RWDelay < 2) fRD = 1;
945 else fRD = 0;
946 else fRD = 1;
947 }
948 else {isRW = 0; Amode = "read"; fRD = 1;}
949
950// Find out who serves this path
951//
952 if (!Cache.Paths.Find(Sel.Path.Val, pinfo)
953 || (amask = ((isRW ? pinfo.rwvec : pinfo.rovec) & ~Sel.nmask)) == 0)
954 {const char* addon = (Sel.nmask ? "remaining " : "");
955 Sel.Resp.DLen = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data)-1,
956 "No %sservers %s %s access to the file", addon,
957 (isRW && Config.forceRO ? "allowed" : "have"), Amode)+1;
958 Sel.Resp.Port = kYR_ENOENT;
959 return EReplete;
960 }
961
962// If we are running a shared file system preform an optional restricted
963// pre-selection and then do a standard selection. Since all nodes are equal,
964// make sure the client is needlessly avoiding them as this signals an error.
965//
966 if (baseFS.isDFS())
967 {if (Sel.nmask && !(Sel.Opts & XrdCmsSelect::NoTryLim))
968 {pmask = (isRW ? pinfo.rwvec : pinfo.rovec) & Sel.nmask;
969 if (!(Sel.Opts & XrdCmsSelect::Online))
970 pmask |= pinfo.ssvec & Sel.nmask;
971 if (pmask && maxBits(pmask, baseFS.dfsTries()))
972 {Sel.Resp.DLen = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data)-1,
973 "Too many DFS %s attempts; operation terminated", Amode)+1;
974 return RetryErr;
975 }
976 }
977 pmask = amask;
978 smask = (Sel.Opts & XrdCmsSelect::Online ? 0 : pinfo.ssvec & amask);
979 if (baseFS.Trim())
980 {Sel.Resp.DLen = 0;
981 if (!(retc = SelDFS(Sel, amask, pmask, smask, isRW)))
982 return (fRD ? Cache.WT4File(Sel,Sel.Vec.hf) : Config.LUPDelay);
983 if (retc < 0) return retc;
984 } else if (noSel) return 0;
985 return SelNode(Sel, pmask, smask);
986 }
987
988// If either a refresh is wanted or we didn't find the file, re-prime the cache
989// which will force the client to wait. Otherwise, compute the primary and
990// secondary selections. If there are none, the client may have to wait if we
991// have servers that we can query regarding the file. Note that for files being
992// opened in write mode, only one writable copy may exist unless this is a
993// meta-operation (e.g., remove) in which case the file itself remain unmodified
994// or a replica request, in which case we select a new target server.
995//
996 if (!(Sel.Opts & XrdCmsSelect::Refresh)
997 && (retc = Cache.GetFile(Sel, pinfo.rovec)))
998 {if (isRW)
999 { if (retc<0) return Config.LUPDelay;
1000 else if (Sel.Opts & XrdCmsSelect::Replica)
1001 {pmask = amask & ~(Sel.Vec.hf | Sel.Vec.bf); smask = 0;
1002 if (!pmask && !Sel.Vec.bf) return SelFail(Sel,eNoRep);
1003 }
1004 else if (Sel.Vec.bf) pmask = smask = 0;
1005 else if (Sel.Vec.hf)
1006 {if (Sel.Opts & XrdCmsSelect::NewFile) return SelFail(Sel,eExists);
1007 if (!(Sel.Opts & XrdCmsSelect::MWFiles))
1008 {if (!(Sel.Opts & XrdCmsSelect::isMeta)
1009 && maxBits(Sel.Vec.hf,2)) return SelFail(Sel,eDups);
1010 if ((Sel.Vec.hf & pinfo.rwvec)
1011 != (Sel.Vec.hf & pinfo.rovec)) return SelFail(Sel,eROfs);
1012 }
1013 if (!(pmask = Sel.Vec.hf & amask)) return SelFail(Sel,eNoSel);
1014 smask = 0;
1015 }
1017 {pmask = amask; smask = 0;}
1018 else if ((smask = pinfo.ssvec & amask)) pmask = 0;
1019 else pmask = smask = 0;
1020 } else {
1021 pmask = Sel.Vec.hf & amask;
1022 if (Sel.Opts & XrdCmsSelect::Online) {pmask &= ~Sel.Vec.pf; smask=0;}
1023 else smask = (retc < 0 ? 0 : pinfo.ssvec & amask);
1024 }
1025 if (Sel.Vec.hf & Sel.nmask) Cache.UnkFile(Sel, Sel.nmask);
1026 } else {
1027 Cache.AddFile(Sel, 0);
1028 Sel.Vec.bf = pinfo.rovec;
1029 Sel.Vec.hf = Sel.Vec.pf = pmask = smask = 0;
1030 retc = 0;
1031 }
1032
1033// A wait is required if we don't have any primary or seconday servers
1034//
1035 dowt = (!pmask && !smask);
1036
1037// If we can query additional servers, do so now. The client will be placed
1038// in the callback queue only if we have no possible selections
1039//
1040 if (Sel.Vec.bf)
1041 {CmsStateRequest QReq = {{Sel.Path.Hash, kYR_state, kYR_raw, 0}};
1042 if (Sel.Opts & XrdCmsSelect::Refresh)
1044 if (dowt) retc= (fRD ? Cache.WT4File(Sel,Sel.Vec.hf) : Config.LUPDelay);
1045 TRACE(Files, "seeking " <<Sel.Path.Val);
1046 amask = Cluster.Broadcast(Sel.Vec.bf, QReq.Hdr,
1047 (void *)Sel.Path.Val,Sel.Path.Len+1);
1048 if (amask) Cache.UnkFile(Sel, amask);
1049 if (dowt) return retc;
1050 } else if (dowt && retc < 0 && !noSel)
1051 return (fRD ? Cache.WT4File(Sel,Sel.Vec.hf) : Config.LUPDelay);
1052
1053// Broadcast a freshen up request if wanted
1054//
1055 if ((Sel.Opts & XrdCmsSelect::Freshen) && (amask = pmask & ~Sel.Vec.bf))
1057 Cluster.Broadcast(amask, Qupt.Hdr,(void *)Sel.Path.Val,Sel.Path.Len+1);
1058 }
1059
1060// If we need to defer selection, simply return as this is a mindless prepare
1061//
1062 if (noSel) return 0;
1063
1064// Check if we have no useable servers
1065//
1066 if (dowt) return Unuseable(Sel);
1067
1068// Check if should eliminate staging servers. We may need to do this if the
1069// client has been eliminating too many of them as they all should be equal.
1070//
1071 if (Sel.nmask && pinfo.ssvec && !(Sel.Opts & XrdCmsSelect::NoTryLim)
1072 && maxBits(Sel.nmask & pinfo.ssvec, baseFS.stgTries()))
1073 {if (!pmask)
1074 {Sel.Resp.DLen = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data)-1,
1075 "Too many attempts to stage %s access to the file", Amode)+1;
1076 return RetryErr;
1077 }
1078 smask = 0;
1079 }
1080
1081// Select a node
1082//
1083 return SelNode(Sel, pmask, smask);
1084}
1085
1086/******************************************************************************/
1087
1088int XrdCmsCluster::Select(SMask_t pmask, int &port, char *hbuff, int &hlen,
1089 int isrw, int isMulti, int ifWant)
1090{
1091 static const SMask_t smLow(255);
1092 XrdCmsSelector selR;
1093 XrdCmsNode *nP = 0;
1094 SMask_t tmask;
1095 int Snum = 0;
1096 XrdNetIF::ifType nType = static_cast<XrdNetIF::ifType>(ifWant);
1097
1098// If there is nothing to select from, return failure
1099//
1100 if (!pmask) return 0;
1101
1102// Obtain the network we need for the client
1103//
1104 selR.needNet = XrdNetIF::Mask(nType);
1105
1106// Initialize
1107//
1108 selR.needSpace = 0;
1109
1110// Packed selection can never occur in this code path so we turn it off
1111//
1112 selR.selPack = 0;
1113
1114// If we are exporting a shared-everything system then the incoming mask
1115// may have more than one server indicated. So, we need to do a full select.
1116// This is forced when isMulti is true, indicating a choice may exist. Note
1117// that the node, if any, is returned unlocked but we have the global mutex.
1118//
1119 if (isMulti || baseFS.isDFS())
1120 {STMutex.ReadLock();
1121 nP = (Config.sched_RR ? SelbyRef(pmask,selR)
1122 : Config.sched_LoadR == 0 ? SelbyLoad(pmask,selR)
1123 : SelbyLoadR(pmask, selR));
1124
1125 if (nP) hlen = nP->netIF.GetName(hbuff, port, nType) + 1;
1126 else hlen = 0;
1127 STMutex.UnLock();
1128 return hlen != 1;
1129 }
1130
1131// In shared-nothing systems the incoming mask will only have a single node.
1132// Compute the a single node number that is contained in the mask.
1133//
1134 do {if (!(tmask = pmask & smLow)) Snum += 8;
1135 else {while((tmask = tmask>>1)) Snum++; break;}
1136 } while((pmask = pmask >> 8));
1137
1138// See if the node passes muster
1139//
1140 STMutex.ReadLock();
1141 if ((nP = NodeTab[Snum]))
1142 { if (nP->isBad) nP = 0;
1143 else if (!Config.sched_RR && (nP->myLoad > Config.MaxLoad)) nP = 0;
1144 else if (!(selR.needNet & nP->hasNet)) nP = 0;
1145 if (nP)
1146 {if (isrw)
1147 if (nP->isNoStage || nP->DiskFree < nP->DiskMinF) nP = 0;
1148 else {nP->RefTotW++; nP->RefW++;}
1149 else {nP->RefTotR++; nP->RefR++;}
1150 }
1151 }
1152
1153// At this point either we have a node or we do not
1154//
1155 if (nP)
1156 {hlen = nP->netIF.GetName(hbuff, port, nType) + 1;
1157 nP->RefR++;
1158 STMutex.UnLock();
1159 return hlen != 1;
1160 }
1161 STMutex.UnLock();
1162 return 0;
1163}
1164
1165/******************************************************************************/
1166/* S e l F a i l */
1167/******************************************************************************/
1168
1169int XrdCmsCluster::SelFail(XrdCmsSelect &Sel, int rc)
1170{
1171//
1172 const char *etext, *Item = "file";
1173
1174 switch(rc)
1175 {case eExists: if (Sel.Opts & XrdCmsSelect::isMeta) Item = "directory";
1176 etext = "Unable to create %s; it already exists.";
1177 Sel.Resp.Port = kYR_RWConflict;
1178 break;
1179 case eROfs: etext = "Unable to modify %s; r/o copy already exists.";
1180 Sel.Resp.Port = kYR_RWConflict;
1181 break;
1182 case eDups: etext = "Unable to modify %s; multiple copies exist.";
1183 Sel.Resp.Port = kYR_RWConflict;
1184 break;
1185 case eNoRep: etext = "Unable to replicate %s; no new sites available.";
1186 Sel.Resp.Port = kYR_noReplicas;
1187 break;
1188 case eNoSel: if (Sel.Vec.hf & Sel.nmask)
1189 {etext = "Unable to access %s; eligible servers shunned.";
1190 if (Sel.Opts & XrdCmsSelect::isDir) Item = "directory";
1191 } else {
1192 if (Sel.Opts & XrdCmsSelect::Write)
1193 {etext = "Unable to write %s; r/w exports not found.";
1194 } else {
1195 etext = "Unable to access %s; it does not exist.";
1196 if (Sel.Opts & XrdCmsSelect::isDir) Item = "directory";
1197 }
1198 }
1199 Sel.Resp.Port = kYR_ENOENT;
1200 break;
1201 default: etext = "Unable to access %s; it does not exist.";
1202 Sel.Resp.Port = kYR_ENOENT;
1203 break;
1204 };
1205
1206 int n = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data), etext, Item);
1207 if (n < (int)sizeof(Sel.Resp.Data)) Sel.Resp.DLen = n+1;
1208 else Sel.Resp.DLen = sizeof(Sel.Resp.Data);
1209
1210 return EReplete;
1211}
1212
1213/******************************************************************************/
1214/* S p a c e */
1215/******************************************************************************/
1216
1218{
1219 XrdCmsNode *nP;
1220 SMask_t bmask;
1221 int i;
1222 bool doAll = !baseFS.isDFS();
1223
1224// Obtain a lock on the table and screen out peer nodes
1225//
1226 STMutex.ReadLock();
1227 bmask = smask & peerMask;
1228
1229// Run through the table getting space information
1230//
1231 for (i = 0; i <= STHi; i++)
1232 if ((nP = NodeTab[i]) && nP->isNode(bmask) && !(nP->isOffline))
1233 {if (doAll || !sData.Total)
1234 {sData.Total += nP->DiskTotal;
1235 sData.TotFr += nP->DiskFree;
1236 }
1237 if (nP->isRW & XrdCmsNode::allowsSS)
1238 {sData.sNum++;
1239 if (sData.sFree < nP->DiskFree)
1240 {sData.sFree = nP->DiskFree; sData.sUtil = nP->DiskUtil;}
1241 }
1242 if (nP->isRW & XrdCmsNode::allowsRW)
1243 {sData.wNum++;
1244 if (sData.wFree < nP->DiskFree)
1245 {sData.wFree = nP->DiskFree; sData.wUtil = nP->DiskUtil;
1246 sData.wMinF = nP->DiskMinF;
1247 }
1248 }
1249 }
1250 STMutex.UnLock();
1251}
1252
1253/******************************************************************************/
1254/* S t a t s */
1255/******************************************************************************/
1256
1257int XrdCmsCluster::Stats(char *bfr, int bln)
1258{
1259 static const char statfmt1[] = "<stats id=\"cms\">"
1260 "<role>%s</role></stats>";
1261 int mlen;
1262
1263// Check if actual length wanted
1264//
1265 if (!bfr) return sizeof(statfmt1) + 8;
1266
1267// Format the statistics (not much here for now)
1268//
1269 mlen = snprintf(bfr, bln, statfmt1, Config.myRType);
1270
1271 if ((bln -= mlen) <= 0) return 0;
1272 return mlen;
1273}
1274
1275/******************************************************************************/
1276/* S t a t t */
1277/******************************************************************************/
1278
1279int XrdCmsCluster::Statt(char *bfr, int bln)
1280{
1281 static const char statfmt0[] = "</stats>";
1282 static const char statfmt1[] = "<stats id=\"cmsm\">"
1283 "<role>%s</role><sel><t>%lld</t><r>%lld</r><w>%lld</w></sel>"
1284 "<node>%d";
1285 static const char statfmt2[] = "<stats id=\"%d\">"
1286 "<host>%s</host><role>%s</role>"
1287 "<run>%s</run><ref><r>%d</r><w>%d</w></ref>%s</stats>";
1288 static const char statfmt3[] = "<shr>%d<use>%d</use></shr>";
1289 static const char statfmt4[] = "</node>";
1290 static const char statfmt5[] =
1291 "<frq><add>%lld<d>%lld</d></add><rsp>%lld<m>%lld</m></rsp>"
1292 "<lf>%lld</lf><ls>%lld</ls><rf>%lld</rf><rs>%lld</rs></frq>";
1293
1294 static int AddFrq = (Config.RepStats & XrdCmsConfig::RepStat_frq);
1295 static int AddShr = (Config.RepStats & XrdCmsConfig::RepStat_shr)
1296 && Config.asMetaMan();
1297
1298 XrdCmsRRQ::Info Frq;
1299 XrdCmsSelected *sp;
1300 int mlen, tlen, n = 0;
1301 char shrBuff[80], stat[6], *stp;
1302 bool oksel;
1303
1304 class spmngr {
1305 public: XrdCmsSelected *sp;
1306
1307 spmngr() {sp = 0;}
1308 ~spmngr() {XrdCmsSelected *xsp;
1309 while((xsp = sp)) {sp = sp->next; delete xsp;}
1310 }
1311 } mngrsp;
1312
1313// Check if actual length wanted
1314//
1315 if (!bfr)
1316 {n = sizeof(statfmt0) +
1317 sizeof(statfmt1) + 12*3 + 3 + 3 +
1318 (sizeof(statfmt2) + 10*2 + 256 + 16) * STMax + sizeof(statfmt4);
1319 if (AddShr) n += sizeof(statfmt3) + 12;
1320 if (AddFrq) n += sizeof(statfmt4) + (10*8);
1321 return n;
1322 }
1323
1324// Get the statistics
1325//
1326 if (AddFrq) RRQ.Statistics(Frq);
1327 mngrsp.sp = sp = List(FULLMASK, LS_NULL, oksel);
1328
1329// Count number of nodes we have
1330//
1331 while(sp) {n++; sp = sp->next;}
1332 sp = mngrsp.sp;
1333
1334// Format the statistics
1335//
1336 long long lclTcnt = SelTcnt, lclRtot = SelRtot, lclWtot = SelWtot;
1337 mlen = snprintf(bfr, bln, statfmt1,
1338 Config.myRType, lclTcnt, lclRtot, lclWtot, n);
1339
1340 if ((bln -= mlen) <= 0) return 0;
1341 tlen = mlen; bfr += mlen; n = 0; *shrBuff = 0;
1342
1343 while(sp && bln > 0)
1344 {stp = stat;
1345 if (sp->Status & XrdCmsSelected::Offline) *stp++ = 'o';
1346 else if (sp->Status & XrdCmsSelected::Suspend) *stp++ = 's';
1347 else if (sp->Status & XrdCmsSelected::Disable) *stp++ = 'd';
1348 else *stp++ = 'a';
1349 if (sp->Status & XrdCmsSelected::isRW) *stp++ = 'w';
1350 if (sp->Status & XrdCmsSelected::NoStage) *stp++ = 'n';
1351 *stp = 0;
1352 if (AddShr) snprintf(shrBuff, sizeof(shrBuff), statfmt3,
1353 (sp->Share ? sp->Share : 100), sp->Shrin);
1354 mlen = snprintf(bfr, bln, statfmt2, n, sp->Ident,
1355 XrdCmsRole::Type(static_cast<XrdCmsRole::RoleID>(sp->RoleID)),
1356 stat, sp->RefTotR, sp->RefTotW, shrBuff);
1357 bfr += mlen; bln -= mlen; tlen += mlen;
1358 sp = sp->next; n++;
1359 }
1360
1361 if (bln <= (int)sizeof(statfmt4)) return 0;
1362 strcpy(bfr, statfmt4); mlen = sizeof(statfmt4) - 1;
1363 bfr += mlen; bln -= mlen; tlen += mlen;
1364
1365 if (AddFrq && bln > 0)
1366 {mlen = snprintf(bfr, bln, statfmt5, Frq.Add2Q, Frq.PBack, Frq.Resp,
1367 Frq.Multi, Frq.luFast, Frq.luSlow, Frq.rdFast, Frq.rdSlow);
1368 bfr += mlen; bln -= mlen; tlen += mlen;
1369 }
1370
1371// See if we overflowed. otherwise finish up
1372//
1373 if (sp || bln < (int)sizeof(statfmt0)) return 0;
1374 strcpy(bfr, statfmt0);
1375 return tlen + sizeof(statfmt0) - 1;
1376}
1377
1378/******************************************************************************/
1379/* P r i v a t e M e t h o d s */
1380/******************************************************************************/
1381/******************************************************************************/
1382/* c a l c D e l a y */
1383/******************************************************************************/
1384
1385XrdCmsNode *XrdCmsCluster::calcDelay(XrdCmsSelector &selR)
1386{
1387 if (!selR.nPick) {selR.delay = 0;
1388 selR.reason = (selR.xNoNet
1389 ? "no eligible servers reachable for"
1390 : "no eligible servers for");
1391 }
1392 else if (selR.xFull) {selR.delay = Config.DiskWT;
1393 selR.reason = "no eligible servers have space for";
1394 }
1395 else if (selR.xOvld) {selR.delay = Config.MaxDelay;
1396 selR.reason = "eligible servers overloaded for";
1397 }
1398 else if (selR.xSusp) {selR.delay = Config.SUSDelay;
1399 selR.reason = "eligible servers suspended for";
1400 }
1401 else if (selR.xOff) {selR.delay = Config.SUPDelay;
1402 selR.reason = "eligible servers offline for";
1403 }
1404 else {selR.delay = Config.SUPDelay;
1405 selR.reason = "server selection error for";
1406 }
1407 return (XrdCmsNode *)0;
1408}
1409
1410/******************************************************************************/
1411/* D r o p */
1412/******************************************************************************/
1413
1414// Warning: STMutex must be locked in write upon entry and the caller must
1415// release it if this method is called directily. Otherwise, the mutex
1416// will be obtained and released. Also, this method may only be called
1417// via Remove() either directly or via a deferred job scheduled by that
1418// method. This method actually deletes the node object.
1419
1420int XrdCmsCluster::Drop(int sent, int sinst, XrdCmsDrop *djp)
1421{
1422 EPNAME("Drop_Node")
1423 XrdCmsNode *nP;
1424 char hname[512];
1425
1426// If we are being called outside of a scheduled job, obtain the mutex
1427//
1428 if (djp) STMutex.WriteLock();
1429
1430// Make sure this node is the right one
1431//
1432 if (!(nP = NodeTab[sent]) || nP->Inst() != sinst)
1433 {if (nP && djp == nP->DropJob) {nP->DropJob = 0; nP->DropTime = 0;}
1434 if (djp) STMutex.UnLock();
1435 DEBUG(sent <<'.' <<sinst <<" cancelled.");
1436 return 0;
1437 }
1438
1439// Check if the drop has been rescheduled
1440//
1441 if (djp && time(0) < nP->DropTime)
1442 {Sched->Schedule((XrdJob *)djp, nP->DropTime);
1443 if (djp) STMutex.UnLock();
1444 return 1;
1445 }
1446
1447// Save the node name (don't want to hold a lock across a message)
1448//
1449 strlcpy(hname, nP->Ident, sizeof(hname));
1450
1451// Cleanup status
1452//
1453 NodeTab[sent] = 0;
1454 nP->isOffline = 1; // STMutex is locked in write mode
1455 nP->DropTime = 0;
1456 nP->DropJob = 0;
1457 nP->isBound = 0;
1458
1459// Remove node from the peer list (if it is one)
1460//
1461 if (nP->isPeer) {peerHost &= nP->NodeMask; peerMask = ~peerHost;}
1462
1463// Remove node entry from the alternate list and readjust the end pointer.
1464//
1465 if (nP->isMan)
1466 {memset((void *)&AltMans[sent*AltSize], (int)' ', AltSize);
1467 if (sent == AltMent)
1468 {AltMent--;
1469 while(AltMent >= 0 && NodeTab[AltMent]
1470 && !NodeTab[AltMent]->isMan) AltMent--;
1471 if (AltMent < 0) AltMend = AltMans;
1472 else AltMend = AltMans + ((AltMent+1)*AltSize);
1473 }
1474 }
1475
1476// Readjust STHi
1477//
1478 if (sent == STHi) while(STHi >= 0 && !NodeTab[STHi]) STHi--;
1479
1480// Invalidate any cached entries for this node
1481//
1482 if (nP->NodeMask) Cache.Drop(nP->NodeMask, sent, STHi);
1483
1484// We can now delete the node object if we were called via a job as we are on
1485// a different thread. Direct calls require that we schedule the deletion as
1486// it may take a long time if there are oustanding references to this node.
1487//
1488 if (djp) {STMutex.UnLock(); nP->Delete(STMutex);}
1489 else nP->DropJob = new XrdCmsDrop(nP);
1490
1491// Document the drop
1492//
1493 Say.Emsg("Drop_Node", hname, "dropped.");
1494 return 0;
1495}
1496
1497/******************************************************************************/
1498/* M u l t i p l e */
1499/******************************************************************************/
1500
1501int XrdCmsCluster::Multiple(SMask_t mVec)
1502{
1503 static const unsigned long long Left32 = 0xffffffff00000000LL;
1504 static const unsigned long long Right32 = 0x00000000ffffffffLL;
1505 static const unsigned long long Left16 = 0x00000000ffff0000LL;
1506 static const unsigned long long Right16 = 0x000000000000ffffLL;
1507 static const unsigned long long Left08 = 0x000000000000ff00LL;
1508 static const unsigned long long Right08 = 0x00000000000000ffLL;
1509 static const unsigned long long Left04 = 0x00000000000000f0LL;
1510 static const unsigned long long Right04 = 0x000000000000000fLL;
1511// 0 1 2 3 4 5 6 7 8 9 A B C D E F
1512 static const int isMult[16] = {0,0,0,1,0,1,1,1,0,1,1,1,1,1,1,1};
1513
1514 if (mVec & Left32) {if (mVec & Right32) return 1;
1515 else mVec = mVec >> 32LL;
1516 }
1517 if (mVec & Left16) {if (mVec & Right16) return 1;
1518 else mVec = mVec >> 16LL;
1519 }
1520 if (mVec & Left08) {if (mVec & Right08) return 1;
1521 else mVec = mVec >> 8LL;
1522 }
1523 if (mVec & Left04) {if (mVec & Right04) return 1;
1524 else mVec = mVec >> 4LL;
1525 }
1526 return isMult[mVec];
1527}
1528
1529/******************************************************************************/
1530/* m a x B i t s */
1531/******************************************************************************/
1532
1533bool XrdCmsCluster::maxBits(SMask_t mVec, int mbits)
1534{
1535 int count = 0;
1536
1537// Count bits. This is the fastest way assuming few bits are set
1538//
1539 while(mVec)
1540 {mVec &= (mVec - 1);
1541 count++;
1542 if (count >= mbits) return true;
1543 }
1544
1545// Indicate we have not reached the maximum bits set
1546//
1547 return false;
1548}
1549
1550/******************************************************************************/
1551/* R e c o r d */
1552/******************************************************************************/
1553
1554void XrdCmsCluster::Record(char *path, const char *reason, bool force)
1555{
1556 EPNAME("Record")
1557 static int msgcnt = 255;
1558 static XrdSysMutex mcMutex;
1559 int skipmsg;
1560
1561 DEBUG(reason <<path);
1562 mcMutex.Lock();
1563 msgcnt++; skipmsg = msgcnt & (force ? 0x0f : 0xff);
1564 mcMutex.UnLock();
1565
1566 if (!skipmsg) Say.Emsg(epname, "client deferred;", reason, path);
1567}
1568
1569/******************************************************************************/
1570/* S e l N o d e */
1571/******************************************************************************/
1572
1573int XrdCmsCluster::SelNode(XrdCmsSelect &Sel, SMask_t pmask, SMask_t amask)
1574{
1575 EPNAME("SelNode")
1576 const char *act=0;
1577 int affsel = 1, count = 0, isalt = 0, pass = 2;
1578 SMask_t mask;
1579 XrdCmsNode *nP = 0;
1580 XrdCmsSelector selR;
1581 XrdNetIF::ifType nType=(XrdNetIF::ifType)(Sel.Opts & XrdCmsSelect::ifWant);
1582
1583// Obtain the network we need for the client
1584//
1585 selR.needNet = XrdNetIF::Mask(nType);
1586
1587// Indicate whether or not stable selection is required
1588//
1589 if (!(Sel.Opts & XrdCmsSelect::Pack)) selR.selPack = 0;
1590 else {unsigned int theHash = (Sel.Opts & XrdCmsSelect::UseAH
1591 ? Sel.AltHash : Sel.Path.Hash);
1592 SMask_t sVec = pmask;
1593 for (count = 0; sVec; count++) sVec &= (sVec - 1);
1594 if (count > 1) selR.selPack = affsel = (theHash % count) + 1;
1595 else selR.selPack = 0;
1596 }
1597
1598// There is a difference bwteen needing space and needing r/w access. The former
1599// is needed when we will be writing data the latter for inode modifications.
1600//
1601 if (Sel.Opts & XrdCmsSelect::isMeta) selR.needSpace = 0;
1602 else selR.needSpace = (Sel.Opts & XrdCmsSelect::Write
1603 ? XrdCmsNode::allowsRW : 0);
1604
1605// Scan for a primary and alternate node (alternates do staging). At this
1606// point we omit all peer nodes as they are our last resort. Note that Selbyxxx
1607// returns the node unlocked but we have the global mutex so that is OK.
1608//
1609 STMutex.ReadLock();
1610 mask = pmask & peerMask;
1611 while(pass--)
1612 {if (mask)
1613 {nP = (Config.sched_RR || (Sel.Opts & XrdCmsSelect::UseRef)
1614 ? SelbyRef(mask,selR)
1615 : Config.sched_LoadR == 0 ? SelbyLoad(pmask,selR)
1616 : SelbyLoadR(pmask, selR));
1617 if (nP || (selR.nPick && selR.delay)
1618 || NodeCnt < Config.SUPCount) break;
1619 }
1620 mask = amask & peerMask; isalt = XrdCmsNode::allowsSS;
1621 if (!(Sel.Opts & XrdCmsSelect::isMeta)) selR.needSpace |= isalt;
1622 }
1623
1624// Produce affinity result trace
1625//
1626 if (Sel.Opts & XrdCmsSelect::Pack && nP)
1627 {TRACE(Redirect, "affinity " <<affsel <<'/' <<count <<'/'
1628 <<(int)selR.selPack <<(selR.selPack ? " go " : " ng ")
1629 <<nP->Name() <<' ' <<Sel.Path.Val);
1630 }
1631
1632// If we found an eligible node then dispatch the client to it. We will
1633// swap the global mutex for the node mutex to minimize interefrence.
1634//
1635 if (nP)
1636 {nP->g2nLock(STMutex);
1637 Sel.Resp.DLen = nP->netIF.GetName(Sel.Resp.Data, Sel.Resp.Port, nType);
1638 if (!Sel.Resp.DLen) {nP->UnLock(); return Unreachable(Sel, false);}
1639 Sel.Resp.DLen++; Sel.smask = nP->NodeMask;
1640
1641 // If a message is to be sent to the selected server, send it.
1642 //
1643 if (Sel.iovN && Sel.iovP) nP->Send(Sel.iovP, Sel.iovN);
1644
1645 // Do special post proccessing when any of:
1646 // a) isalt true: Secondary selection occurred
1647 // b) Create set: File creation will occur
1648 //
1649 if (isalt || (Sel.Opts & XrdCmsSelect::Create))
1651 if (Sel.Opts & XrdCmsSelect::noBind) act = " handling ";
1652 else Cache.AddFile(Sel, nP->NodeMask);
1653 }
1654
1655 // Determine what we are actually doing here
1656 //
1657 nP->UnLock();
1658 if (!act)
1659 {if (isalt) act = (Sel.iovN ? " staging " : " assigned ");
1660 else act = " serving ";
1661 }
1662 TRACE(Stage, Sel.Resp.Data <<act <<Sel.Path.Val);
1663 return 0;
1664 }
1665
1666// No node so check if we have a sufficient number to continue. Note that we
1667// do not forward to a peer unless we have a suffficient number of local nodes.
1668//
1669 if (!selR.delay && NodeCnt < Config.SUPCount)
1670 {STMutex.UnLock();
1671 Record(Sel.Path.Val, "insufficient number of nodes", true);
1672 return Config.SUPDelay;
1673 }
1674
1675// Return delay if we should avoid selecting a peer manager
1676//
1677 if (selR.delay && selR.delay < Config.PSDelay)
1678 {STMutex.UnLock();
1679 Record(Sel.Path.Val, selR.reason);
1680 return selR.delay;
1681 }
1682
1683// At this point, we attempt a peer node selection (choice of last resort). Note
1684// that we are still holding the global lock! If we find a peer node we will
1685// swap it with the node lock.
1686//
1687 if (Sel.Opts & XrdCmsSelect::Peers)
1688 {const char *reason1 = selR.reason;
1689 int delay1 = selR.delay;
1690 bool noNet = selR.xNoNet;
1691 if ((mask = (pmask | amask) & peerHost)) nP = SelbyCost(mask, selR);
1692 if (nP)
1693 {nP->g2nLock(STMutex);
1694 Sel.Resp.DLen = nP->netIF.GetName(Sel.Resp.Data,Sel.Resp.Port,nType);
1695 if (!Sel.Resp.DLen) {nP->UnLock(); return Unreachable(Sel, false);}
1696 Sel.Resp.DLen++; Sel.smask = nP->NodeMask;
1697 if (Sel.iovN && Sel.iovP) nP->Send(Sel.iovP, Sel.iovN);
1698 nP->UnLock();
1699 TRACE(Stage, "Peer " <<Sel.Resp.Data <<" handling " <<Sel.Path.Val);
1700 return 0;
1701 }
1702 if (!selR.delay)
1703 {selR.delay = delay1; selR.reason = reason1; selR.xNoNet = noNet;}
1704 }
1705
1706// At this point we don't need the global lock so let it go.
1707//
1708 STMutex.UnLock();
1709
1710// At this point we either don't have enough nodes or simply can't handle this
1711//
1712 if (selR.delay)
1713 {Record(Sel.Path.Val, selR.reason);
1714 return selR.delay;
1715 }
1716
1717// Return appropriate error message
1718//
1719 if (selR.xNoNet) return Unreachable(Sel, true);
1720 return Unuseable(Sel);
1721}
1722
1723/******************************************************************************/
1724/* R e f C o u n t */
1725/******************************************************************************/
1726
1727// This snippet of code occurrs often enough so that we make it a macro as we
1728// want to execute this inline.
1729//
1730#define RefCount(sP, sPMulti, NeedSpace) \
1731 if (NeedSpace) {sP->RefTotW++; sP->RefW++;} \
1732 else {sP->RefTotR++; sP->RefR++;} \
1733 if (sPMulti && sP->Share && !sP->Shrem--) \
1734 {sP->RefW += sP->Shrip; sP->RefR += sP->Shrip; \
1735 sP->Shrem = sP->Share; sP->Shrin++; \
1736 }
1737
1738/******************************************************************************/
1739/* S e l b y C o s t */
1740/******************************************************************************/
1741
1742// Cost selection is used only for peer node selection as peers do not
1743// report a load and handle their own scheduling.
1744
1745// Caller must have the STMutex locked. The returned node, if any, is unlocked.
1746
1747XrdCmsNode *XrdCmsCluster::SelbyCost(SMask_t mask, XrdCmsSelector &selR)
1748{
1749 XrdCmsNode *np, *sp = 0;
1750 bool Multi = false;
1751
1752// Scan for a node (sp points to the selected one)
1753//
1754 selR.Reset(); SelTcnt++;
1755 for (int i = 0; i <= STHi; i++)
1756 if ((np = NodeTab[i]) && (np->NodeMask & mask))
1757 {if (!(selR.needNet & np->hasNet)) {selR.xNoNet= true; continue;}
1758 selR.nPick++;
1759 if (np->isOffline) {selR.xOff = true; continue;}
1760 if (np->isBad) {selR.xSusp = true; continue;}
1761 if (selR.needSpace && np->isNoStage) {selR.xFull = true; continue;}
1762 if (!sp) sp = np;
1763 else{if (abs(sp->myCost - np->myCost) <= Config.P_fuzz)
1764 { if (selR.selPack)
1765 {if (--selR.selPack) sp=np;
1766 else break;
1767 }
1768 else if (selR.needSpace)
1769 {if (sp->RefW > (np->RefW+Config.DiskLinger))
1770 sp=np;
1771 }
1772 else if (sp->RefR > np->RefR) sp=np;
1773 }
1774 else if (sp->myCost > np->myCost) sp=np;
1775 Multi = true;
1776 }
1777 }
1778
1779// Check for overloaded node and return result
1780//
1781 if (!sp) return calcDelay(selR);
1782 RefCount(sp, Multi, selR.needSpace);
1783 return sp;
1784}
1785
1786/******************************************************************************/
1787/* S e l b y L o a d */
1788/******************************************************************************/
1789
1790// Caller must have the STMutex locked. The returned node, if any, is unlocked.
1791
1792XrdCmsNode *XrdCmsCluster::SelbyLoad(SMask_t mask, XrdCmsSelector &selR)
1793{
1794 XrdCmsNode *np, *sp = 0;
1795 bool Multi = false, reqSS = (selR.needSpace & XrdCmsNode::allowsSS) != 0;
1796
1797// Scan for a node (preset possible, suspended, overloaded, full, and dead)
1798//
1799 selR.Reset(); SelTcnt++;
1800 for (int i = 0; i <= STHi; i++)
1801 if ((np = NodeTab[i]) && (np->NodeMask & mask))
1802 {if (!(selR.needNet & np->hasNet)) {selR.xNoNet= true; continue;}
1803 selR.nPick++;
1804 if (np->isOffline) {selR.xOff = true; continue;}
1805 if (np->isBad) {selR.xSusp = true; continue;}
1806 if (np->myLoad > Config.MaxLoad) {selR.xOvld = true; continue;}
1807 if (selR.needSpace && (np->DiskFree < np->DiskMinF
1808 || (reqSS && np->isNoStage)))
1809 {selR.xFull = true; continue;}
1810 if (!sp) sp = np;
1811 else{if (selR.needSpace)
1812 {if (abs(sp->myMass - np->myMass) <= Config.P_fuzz)
1813 {if (sp->RefW > (np->RefW+Config.DiskLinger)) sp=np;}
1814 else if (sp->myMass > np->myMass) sp=np;
1815 } else {
1816 if (abs(sp->myLoad - np->myLoad) <= Config.P_fuzz)
1817 {if (selR.selPack)
1818 {if (--selR.selPack) sp=np;
1819 else break;
1820 }
1821 else if (sp->RefR > np->RefR) sp=np;
1822 }
1823 else if (sp->myLoad > np->myLoad) sp=np;
1824 }
1825 Multi = true;
1826 }
1827 }
1828
1829// Check for overloaded node and return result
1830//
1831 if (!sp) return calcDelay(selR);
1832 RefCount(sp, Multi, selR.needSpace);
1833 return sp;
1834}
1835
1836/******************************************************************************/
1837/* S e l b y L o a d R */
1838/******************************************************************************/
1839
1840// Caller must have the STMutex locked. The returned node, if any, is unlocked.
1841
1842XrdCmsNode *XrdCmsCluster::SelbyLoadR(SMask_t mask, XrdCmsSelector &selR)
1843{
1844 static std::random_device rand_dev;
1845 static std::default_random_engine generator(rand_dev());
1846
1847 XrdCmsNode *np = nullptr, *sp = nullptr;
1848 bool reqSS = (selR.needSpace & XrdCmsNode::allowsSS) != 0;
1849
1850 // Scan for a node (preset possible, suspended, overloaded, full, and dead)
1851
1852 selR.Reset();
1853 SelTcnt++;
1854
1855 int totWeight = 0;
1856
1857 for (int i = 0; i <= STHi; ++i) {
1858 NodeWeight[i] = 0; // make node unselectable first
1859
1860 if (!((np = NodeTab[i]) && (np->NodeMask & mask)))
1861 continue;
1862
1863 if (!(selR.needNet & np->hasNet)) { selR.xNoNet = true; continue; }
1864
1865 selR.nPick++;
1866
1867 if (np->isOffline) { selR.xOff = true; continue; }
1868 if (np->isBad) { selR.xSusp = true; continue; }
1869 if (np->myLoad > Config.MaxLoad) { selR.xOvld = true; continue; }
1870
1871 if (selR.needSpace) {
1872 if (np->DiskFree < np->DiskMinF || (reqSS && np->isNoStage)) {
1873 selR.xFull = true;
1874 continue;
1875 }
1876 }
1877
1878 // If node passes filters, give it a weight
1879 totWeight += Config.P_fuzz + (100 - np->myLoad);
1880 NodeWeight[i] = totWeight;
1881 }
1882
1883 std::uniform_int_distribution<int> distr(1, totWeight);
1884 int selected = distr(generator);
1885
1886 for (int i = 0; i <= STHi; ++i) {
1887 if (NodeWeight[i] < selected)
1888 continue;
1889
1890 sp = NodeTab[i];
1891 break;
1892 }
1893
1894 return sp ? sp : calcDelay(selR);
1895}
1896
1897/******************************************************************************/
1898/* S e l b y R e f */
1899/******************************************************************************/
1900
1901// Caller must have the STMutex locked. The returned node, if any, is unlocked.
1902
1903XrdCmsNode *XrdCmsCluster::SelbyRef(SMask_t mask, XrdCmsSelector &selR)
1904{
1905 XrdCmsNode *np, *sp = 0;
1906 bool Multi = false, reqSS = (selR.needSpace & XrdCmsNode::allowsSS) != 0;
1907
1908// Scan for a node (sp points to the selected one)
1909//
1910 selR.Reset(); SelTcnt++;
1911 for (int i = 0; i <= STHi; i++)
1912 if ((np = NodeTab[i]) && (np->NodeMask & mask))
1913 {if (!(selR.needNet & np->hasNet)) {selR.xNoNet= true; continue;}
1914 selR.nPick++;
1915 if (np->isOffline) {selR.xOff = true; continue;}
1916 if (np->isBad) {selR.xSusp = true; continue;}
1917 if (selR.needSpace && (np->DiskFree < np->DiskMinF
1918 || (reqSS && np->isNoStage)))
1919 {selR.xFull = true; continue;}
1920 if (!sp) sp = np;
1921 else {Multi = true;
1922 if (selR.selPack)
1923 {if (--selR.selPack) sp=np;
1924 else break;
1925 }
1926 else if (selR.needSpace)
1927 {if (sp->RefW > (np->RefW+Config.DiskLinger)) sp=np;}
1928 else if (sp->RefR > np->RefR) sp=np;
1929 }
1930 }
1931
1932// Check for overloaded node and return result
1933//
1934 if (!sp) return calcDelay(selR);
1935 RefCount(sp, Multi, selR.needSpace);
1936 return sp;
1937}
1938
1939/******************************************************************************/
1940/* S e l D F S */
1941/******************************************************************************/
1942
1943int XrdCmsCluster::SelDFS(XrdCmsSelect &Sel, SMask_t amask,
1944 SMask_t &pmask, SMask_t &smask, int isRW)
1945{
1946 EPNAME("SelDFS");
1947 static const SMask_t allNodes(~0);
1948 int oldOpts, rc;
1949
1950// The first task is to find out if the file exists somewhere. If we are doing
1951// local queries, then the answer will be immediate. Otherwise, forward it.
1952//
1953 if ((Sel.Opts & XrdCmsSelect::Refresh) || !(rc = Cache.GetFile(Sel, amask)))
1954 {if (!baseFS.Local())
1955 {CmsStateRequest QReq = {{Sel.Path.Hash, kYR_state, kYR_raw, 0}};
1956 TRACE(Files, "seeking " <<Sel.Path.Val);
1957 Cache.AddFile(Sel, 0);
1958 if (Sel.Opts & XrdCmsSelect::Refresh)
1960 Cluster.Broadsend(amask, QReq.Hdr, Sel.Path.Val, Sel.Path.Len+1);
1961 return 0;
1962 }
1963 if ((rc = baseFS.Exists(Sel.Path.Val, -Sel.Path.Len)) < 0)
1964 {Cache.AddFile(Sel, 0);
1965 Sel.Vec.bf = Sel.Vec.pf = Sel.Vec.wf = Sel.Vec.hf = 0;
1966 } else {
1967 Sel.Vec.hf = amask; Sel.Vec.wf = (isRW ? amask : 0);
1968 oldOpts = Sel.Opts;
1969 if (rc != CmsHaveRequest::Pending) Sel.Vec.pf = 0;
1970 else {Sel.Vec.pf = amask; Sel.Opts |= XrdCmsSelect::Pending;}
1971 Cache.AddFile(Sel, allNodes);
1972 Sel.Opts = oldOpts;
1973 }
1974 }
1975
1976// Screen out online requests where the file is pending
1977//
1978 if (Sel.Opts & XrdCmsSelect::Online && Sel.Vec.pf)
1979 {pmask = smask = 0;
1980 return 1;
1981 }
1982
1983// If the file is to be written and the files exists then it can't be a new file
1984//
1985 if (isRW && Sel.Vec.hf)
1986 {if (Sel.Opts & XrdCmsSelect::NewFile) return SelFail(Sel,eExists);
1987 if (Sel.Opts & XrdCmsSelect::Trunc) smask = 0;
1988 return 1;
1989 }
1990
1991// Final verification that we have something to select
1992//
1993 if (!Sel.Vec.hf
1994 && (!isRW || !(Sel.Opts & (XrdCmsSelect::Trunc | XrdCmsSelect::NewFile))))
1995 return SelFail(Sel, eNoEnt);
1996 return 1;
1997}
1998
1999/******************************************************************************/
2000/* s e n d A L i s t */
2001/******************************************************************************/
2002
2003// Single entry at a time, protected by STMutex in write mode!
2004
2005void XrdCmsCluster::sendAList(XrdLink *lp)
2006{
2007 static CmsTryRequest Req = {{0, kYR_try, 0, 0}, 0};
2008 static int HdrSize = sizeof(Req.Hdr) + sizeof(Req.sLen);
2009 static char *AltNext = AltMans;
2010 static struct iovec iov[4] = {{(caddr_t)&Req, (size_t)HdrSize},
2011 {0, 0},
2012 {AltMans, 0},
2013 {(caddr_t)"\0", 1}};
2014 int dlen;
2015
2016// Calculate what to send
2017//
2018 AltNext = AltNext + AltSize;
2019 if (AltNext >= AltMend)
2020 {AltNext = AltMans;
2021 iov[1].iov_len = 0;
2022 iov[2].iov_len = dlen = AltMend - AltMans;
2023 } else {
2024 iov[1].iov_base = (caddr_t)AltNext;
2025 iov[1].iov_len = AltMend - AltNext;
2026 iov[2].iov_len = AltNext - AltMans;
2027 dlen = iov[1].iov_len + iov[2].iov_len;
2028 }
2029
2030// Complete the request (account for trailing null character)
2031//
2032 dlen++;
2033 Req.Hdr.datalen = htons(static_cast<unsigned short>(dlen+sizeof(Req.sLen)));
2034 Req.sLen = htons(static_cast<unsigned short>(dlen));
2035
2036// Send the list of alternates (rotated once)
2037//
2038 lp->Send(iov, 4, dlen+HdrSize);
2039}
2040
2041/******************************************************************************/
2042/* s e t A l t M a n */
2043/******************************************************************************/
2044
2045// Single entry at a time, protected by STMutex in write mode!
2046
2047void XrdCmsCluster::setAltMan(int snum, XrdLink *lp, int port)
2048{
2049 XrdNetAddr altAddr = *(lp->NetAddr());
2050 char *ap = &AltMans[snum*AltSize];
2051 int i;
2052
2053// Preset the buffer and pre-screen the port number
2054//
2055 if (!port || (port > 0x0000ffff)) port = Config.PortTCP;
2056 memset(ap, int(' '), AltSize);
2057
2058// First tr to use the hostname:port which may be too large (unlikely). Else
2059// Insert the ip address of this node into the list of nodes. We made sure that
2060// the size of he buffer was big enough so no need to check for overflow.
2061//
2062 altAddr.Port(port);
2063 if (Config.DoHnTry) i = altAddr.Format(ap, AltSize, XrdNetAddr::fmtName);
2064 else i = 0;
2065 if (!i) i=altAddr.Format(ap,AltSize,XrdNetAddr::fmtAddr,XrdNetAddr::prefipv4);
2066 ap[i] = ' ';
2067
2068// Compute new fence
2069//
2070 if (ap >= AltMend) {AltMend = ap + AltSize; AltMent = snum;}
2071}
2072
2073/******************************************************************************/
2074/* U n r e a c h a b l e */
2075/******************************************************************************/
2076
2077int XrdCmsCluster::Unreachable(XrdCmsSelect &Sel, bool none)
2078{
2080 const char *Amode = (Sel.Opts & XrdCmsSelect::Write ? "write" : "read");
2081 const char *Xmode = (Sel.Opts & XrdCmsSelect::Online ? "immediately " : "");
2082
2083 if (none)
2084 {Sel.Resp.DLen = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data)-1,
2085 "No servers are reachable via %s network to %s%s the file.",
2086 XrdNetIF::Name(nType), Xmode, Amode) + 1;
2087 } else {
2088 Sel.Resp.DLen = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data)-1,
2089 "Eligible server is unreachable via %s network to %s%s the file.",
2090 XrdNetIF::Name(nType), Xmode, Amode) + 1;
2091 }
2092 Sel.Resp.Port = kYR_ENETUNREACH;
2093 return EReplete;
2094}
2095
2096/******************************************************************************/
2097/* U n u s e a b l e */
2098/******************************************************************************/
2099
2100int XrdCmsCluster::Unuseable(XrdCmsSelect &Sel)
2101{
2102 const char *Amode = (Sel.Opts & XrdCmsSelect::Write ? "write" : "read");
2103 const char *Xmode = (Sel.Opts & XrdCmsSelect::Online ? "immediately " : "");
2104 const char *EType = (Sel.Opts & XrdCmsSelect::isDir ? "directory" : "file");
2105
2106 int n = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data),
2107 "No servers are available to %s%s the %s.",
2108 Xmode, Amode, EType);
2109 if (n < (int)sizeof(Sel.Resp.Data)) Sel.Resp.DLen = n+1;
2110 else Sel.Resp.DLen = sizeof(Sel.Resp.Data);
2111
2112 Sel.Resp.Port = kYR_ENOENT;
2113 return EReplete;
2114}
unsigned char kXR_char
Definition XPtypes.hh:65
void Usage(const char *msg)
#define DEBUG(x)
#define EPNAME(x)
#define RefCount(sP, sPMulti, NeedSpace)
#define QTRACE(act)
#define STMax
unsigned long long SMask_t
#define FULLMASK
#define stat(a, b)
Definition XrdPosix.hh:101
bool Debug
bool Exists
XrdOucString Path
struct myOpts opts
if(Avsz)
if(ec< 0) ec
size_t strlcpy(char *dst, const char *src, size_t sz)
#define TRACE(act, x)
Definition XrdTrace.hh:63
int Exists(XrdCmsRRData &Arg, XrdCmsPInfo &Who, int noLim=0)
static int Present(const char *hName, XrdOucTList *bList=0, char *rbuff=0, int rblen=0)
int GetFile(XrdCmsSelect &Sel, SMask_t mask)
int AddFile(XrdCmsSelect &Sel, SMask_t mask)
static XrdCmsClustID * AddID(const char *cID)
static SMask_t Mask(const char *cID)
XrdCmsNode * RemNode(XrdCmsNode *nP)
static XrdCmsClustID * Find(const char *cID)
bool AddNode(XrdCmsNode *nP, bool isMan)
SMask_t getMask(const XrdNetAddr *addr)
void Space(XrdCms::SpaceData &sData, SMask_t smask)
int Broadsend(SMask_t smask, XrdCms::CmsRRHdr &Hdr, void *Data, int Dlen)
int Select(XrdCmsSelect &Sel)
int Locate(XrdCmsSelect &Sel)
static const int EReplete
void ResetRef(SMask_t smask, bool isLocked=false)
SMask_t Broadcast(SMask_t, const struct iovec *, int, int tot=0)
XrdCmsSelected * List(SMask_t mask, CmsLSOpts opts, bool &oksel)
static const int RetryErr
XrdCmsNode * Add(XrdLink *lp, int dport, int Status, int sport, const char *theNID, const char *theIF)
static const int NotFound
static const int Wait4CBk
void Remove(XrdCmsNode *theNode)
int Stats(char *bfr, int bln)
virtual void BlackList(XrdOucTList *blP)
friend class XrdCmsDrop
int Statt(char *bfr, int bln)
static const int RepStat_shr
static const int RepStat_frq
XrdCmsDrop(int nid, int inst)
XrdCmsNode * nodeP
XrdCmsDrop(XrdCmsNode *nP)
unsigned int Hash
Definition XrdCmsKey.hh:53
char * Val
Definition XrdCmsKey.hh:52
short Len
Definition XrdCmsKey.hh:54
static const char allowsRW
Definition XrdCmsNode.hh:84
char * Ident
Definition XrdCmsNode.hh:61
void Delete(XrdSysRWLock &gMutex)
void n2gLock(XrdSysRWLock &gMutex, bool rdlock=false)
int Send(const char *buff, int blen=0)
static const char allowsSS
Definition XrdCmsNode.hh:85
void unRef()
void Lock()
static const char isDisabled
Definition XrdCmsNode.hh:80
int isNode(SMask_t smask)
SMask_t Mask()
char * Name()
char isOffline
Definition XrdCmsNode.hh:64
void g2nLock(XrdSysRWLock &gMutex)
static const char isSuspend
Definition XrdCmsNode.hh:81
unsigned int DiskTotal
Definition XrdCmsNode.hh:87
int ID(int &INum)
char isNoStage
Definition XrdCmsNode.hh:66
void Disc(const char *reason=0, int needLock=1)
void UnLock()
void setName(XrdLink *lnkp, const char *theIF, int port)
static const char isDoomed
Definition XrdCmsNode.hh:82
static const char isBlisted
Definition XrdCmsNode.hh:79
SMask_t ssvec
SMask_t rovec
SMask_t rwvec
SMask_t rwVec
Definition XrdCmsRRQ.hh:59
static const char * Type(RoleID rid)
Definition XrdCmsRole.hh:78
struct XrdCmsSelect::@372246154012353073336061155127145056202367365025 Resp
XrdCmsRRQInfo * InfoP
XrdCmsKey Path
struct XrdCmsSelect::@234326324321301042052133314024110005315140201317 Vec
static const int IdentSize
char Ident[IdentSize]
XrdCmsSelected * next
const char * reason
XrdJob(const char *desc="")
Definition XrdJob.hh:51
static const int prefipv4
Use if mapped IPV4 actual format.
int Format(char *bAddr, int bLen, fmtUse fmtType=fmtAuto, int fmtOpts=0)
@ fmtAddr
Address using suitable ipv4 or ipv6 format.
@ fmtName
Hostname if it is resolvable o/w use fmtAddr.
int Port(int pNum=-1)
char Mask()
Definition XrdNetIF.hh:242
static const char * Name(ifType ifT)
Definition XrdNetIF.hh:266
int Port()
Definition XrdNetIF.hh:276
bool HasDest(ifType ifT=PublicV6)
Definition XrdNetIF.hh:221
int GetName(const char *&name, ifType ifT=PublicV6)
Definition XrdNetIF.hh:102
int GetDest(char *dest, int dlen, ifType ifT=PublicV6, bool prefn=false)
Definition XrdNetIF.cc:389
ifType
The enum that is used to index into ifData to get appropriate interface.
Definition XrdNetIF.hh:64
@ PrivateIF
Definition XrdNetIF.hh:68
static int Pack(struct iovec **, const char *, unsigned short &buff)
Definition XrdOucPup.cc:52
static void Snooze(int seconds)
XrdCmsRRQ RRQ
Definition XrdCmsRRQ.cc:55
XrdCmsCache Cache
static const unsigned char kYR_Version
Definition YProtocol.hh:80
kXR_unt16 datalen
Definition YProtocol.hh:86
@ kYR_ENETUNREACH
Definition YProtocol.hh:158
@ kYR_noReplicas
Definition YProtocol.hh:164
@ kYR_ENOENT
Definition YProtocol.hh:150
@ kYR_RWConflict
Definition YProtocol.hh:163
static const int CMS_isSuper
static const int CMS_noStage
kXR_char modifier
Definition YProtocol.hh:85
XrdScheduler * Sched
XrdCmsCluster Cluster
XrdCmsBaseFS baseFS
XrdSysError Say
XrdCmsState CmsState
static const int CMS_isMan
XrdCmsConfig Config
@ kYR_state
Definition YProtocol.hh:110
@ kYR_usage
Definition YProtocol.hh:116
static const int CMS_isPeer
static const int CMS_Suspend
long long luSlow
Definition XrdCmsRRQ.hh:139
long long rdSlow
Definition XrdCmsRRQ.hh:141
long long Resp
Definition XrdCmsRRQ.hh:136
long long luFast
Definition XrdCmsRRQ.hh:138
long long Add2Q
Definition XrdCmsRRQ.hh:134
long long Multi
Definition XrdCmsRRQ.hh:137
long long rdFast
Definition XrdCmsRRQ.hh:140
long long PBack
Definition XrdCmsRRQ.hh:135