/* -*- Mode:C++; c-basic-offset:8; tab-width:8; indent-tabs-mode:t -*- */ /* * Copyright (c) 1990-1997 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Computer Systems * Engineering Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * * Here is one set of parameters from one of Sally's simulations * (this is from tcpsim, the older simulator): * * ed [ q_weight=0.002 thresh=5 linterm=30 maxthresh=15 * mean_pktsize=500 dropmech=random-drop queue-size=60 * plot-file=none bytes=false doubleq=false dqthresh=50 * wait=true ] * * 1/"linterm" is the max probability of dropping a packet. * There are different options that make the code * more messy that it would otherwise be. For example, * "doubleq" and "dqthresh" are for a queue that gives priority to * small (control) packets, * "bytes" indicates whether the queue should be measured in bytes * or in packets, * "dropmech" indicates whether the drop function should be random-drop * or drop-tail when/if the queue overflows, and * the commented-out Holt-Winters method for computing the average queue * size can be ignored. * "wait" indicates whether the gateway should wait between dropping * packets. * * adapted to REM 01/2000 by lmwang@cs.princeton.edu */ #ifndef lint static const char rcsid[] = "@(#) $Header: /usr/src/mash/repository/vint/ns-2/red.cc,v 1.34 1999/01/07 19:01:57 sfloyd Exp $ (LBL)"; #endif #include #include #include #include "template.h" #include "random.h" #include "flags.h" #include "delay.h" #include "rem.h" #include "tcp.h" #define max(a,b) ((a) > (b) ? (a):(b)) static class REMClass : public TclClass { public: REMClass() : TclClass("Queue/REM") {} TclObject* create(int, const char*const*) { return (new REMQueue); } } class_rem; REMQueue::REMQueue() : link_(NULL), bcount_(0), tchan_(0), idle_(1), aggr_timer(this) { bind_bool("bytes_", &emp_.bytes); // boolean: use bytes? bind_bool("queue-in-bytes_", &qib_); // boolean: q in bytes? bind("mean_pktsize_", &emp_.mean_pktsize); // avg pkt size bind("q_weight_", &emp_.q_w); // for EWMA bind("rate_count_", &emp_.rate_count); // count input bind("rate_sample_", &emp_.rate_timeout); // interval to cal rate bind("delta_", &emp_.delta); // weight for input rate bind("gamma_", &emp_.gamma); // stepsize of price bind("phi_", &emp_.phi); // base for price bind("alpha_l_", &emp_.alpha_l); // weight of buf for price bind("price_", &emv_.p_l); // price of the link bind("marking_prob_", &emv_.m_l); // marking problablity bind_bool("drop-tail_", &drop_tail_); // drop last pkt bind_bool("drop-front_", &drop_front_); // drop first pkt bind_bool("drop-rand_", &drop_rand_); // drop pkt at random bind("ave_", &emv_.v_ave); // average queue sie bind("prob1_", &emv_.v_prob1); // dropping probability bind("curq_", &curq_); // current queue size q_ = new PacketQueue(); // underlying queue pq_ = q_; reset(); #ifdef notdef print_edp(); print_edv(); #endif } void REMQueue::reset() { /* * Compute the "packet time constant" if we know the * link bandwidth. The ptc is the max number of (avg sized) * pkts per second which can be placed on the link. * The link bw is given in bits/sec, so scale mean psize * accordingly. */ if (link_) emp_.ptc = link_->bandwidth() / (8. * emp_.mean_pktsize); emv_.v_ave = 0.0; emv_.v_prob1 = 0.0; emv_.m_l = 0.0; emv_.p_l = 0.0; emv_.aggr_input = 0.0; idle_ = 1; if (&Scheduler::instance() != NULL) idletime_ = last_aggr_time = Scheduler::instance().clock(); else idletime_ = last_aggr_time = 0.0; /* sched not instantiated yet */ Queue::reset(); aggr_bytes = 0; aggr_count = 0; bcount_ = 0; set_rate_timer(); } /* * Set rate calculation timer By calling resched(), * it does not matter whether the timer was already running. */ void REMQueue::set_rate_timer() { if (&Scheduler::instance() != NULL) last_aggr_time = Scheduler::instance().clock(); else last_aggr_time = 0.0; aggr_timer.resched(emp_.rate_timeout); } /* * Compute the average queue size. * The code contains two alternate methods for this, the plain EWMA * and the Holt-Winters method. * nqueued can be bytes or packets */ void REMQueue::run_estimator(int nqueued, int m) { float f, f_sl, f_old; f = emv_.v_ave; f_sl = emv_.v_slope; #define REM_EWMA #ifdef REM_EWMA while (--m >= 1) { f_old = f; f *= 1.0 - emp_.q_w; } f_old = f; f *= 1.0 - emp_.q_w; f += emp_.q_w * nqueued; #endif #ifdef REM_HOLT_WINTERS while (--m >= 1) { f_old = f; f += f_sl; f *= 1.0 - emp_.q_w; f_sl *= 1.0 - 0.5 * emp_.q_w; f_sl += 0.5 * emp_.q_w * (f - f_old); } f_old = f; f += f_sl; f *= 1.0 - emp_.q_w; f += emp_.q_w * nqueued; f_sl *= 1.0 - 0.5 * emp_.q_w; f_sl += 0.5 * emp_.q_w * (f - f_old); #endif emv_.v_ave = f; emv_.v_slope = f_sl; } /* * Return the next packet in the queue for transmission. */ Packet* REMQueue::deque() { Packet *p; p = q_->deque(); if (p != 0) { idle_ = 0; bcount_ -= ((hdr_cmn*)p->access(off_cmn_))->size(); } else { idle_ = 1; // deque() may invoked by Queue::reset at init // time (before the scheduler is instantiated). // deal with this case if (&Scheduler::instance() != NULL) idletime_ = Scheduler::instance().clock(); else idletime_ = 0.0; } return (p); } /* * should the packet be marked due to a probabilistic drop? */ int REMQueue::random_mark(Packet* pkt) { //hdr_cmn* ch = (hdr_cmn*)pkt->access(off_cmn_); //hdr_tcp *tcph = hdr_tcp::access(pkt); double p = emv_.m_l; if (emv_.v_prob1 > 1.0) emv_.v_prob1 = 1.0; if ( p > 1.0) p = 1.0; emv_.m_l = p; // drop probability is computed, pick random number and act double u = Random::uniform(); hdr_flags* hf = (hdr_flags*)pickPacketForECN(pkt)->access(off_flags_); if (u <= p) { // MARK if (hf->ect()) { //printf("****mark p %d u %.4f p: %.4f\n", (tcph->seqno()), u, p); hf->ce() = 1; // mark Congestion Experienced bit return (0); // no drop } } else hf->ce() = 0; return (0); // no Mark } /* * Pick packet for early congestion notification (ECN). This packet is then * marked. Having a separate function do this is convenient for * supporting derived classes that use the standard REM algorithm to compute * average queue size but use a different algorithm for choosing the packet for * ECN notification. */ Packet* REMQueue::pickPacketForECN(Packet* pkt) { return pkt; /* pick the packet that just arrived */ } /* * Pick packet to drop. Having a separate function do this is convenient for * supporting derived classes that use the standard RED algorithm to compute * average queue size but use a different algorithm for choosing the victim. */ Packet* REMQueue::pickPacketToDrop() { int victim; if (drop_front_) victim = min(1, q_->length()-1); else if (drop_rand_) victim = Random::integer(q_->length()); else /* default is drop_tail_ */ victim = q_->length() - 1; return(q_->lookup(victim)); } /* * Receive a new packet arriving at the queue. * The average queue size is computed. If the average size * exceeds the threshold, then the dropping probability is computed, * and the newly-arriving packet is dropped with that probability. * The packet is also dropped if the maximum queue size is exceeded. * * "Forced" drops mean a packet arrived when the underlying queue was * full or when the average q size exceeded maxthresh. * "Unforced" means a REM random drop. * * For forced drops, either the arriving packet is dropped or one in the * queue is dropped, depending on the setting of drop_tail_. * For unforced drops, the arriving packet is always the victim. */ #define DTYPE_NONE 0 /* ok, no drop/marking */ #define DTYPE_FORCED 1 /* a "forced" drop */ #define DTYPE_UNFORCED 2 /* an "unforced" (random) marking */ void REMQueue::enque(Packet* pkt) { /* * if we were idle, we pretend that m packets arrived during * the idle period. m is set to be the ptc times the amount * of time we've been idle for */ int m = 0; if (idle_) { double now = Scheduler::instance().clock(); /* To account for the period when the queue was empty. */ idle_ = 0; m = int(emp_.ptc * (now - idletime_)); } /* * Run the estimator with either 1 new packet arrival, or with * the scaled version above [scaled by m due to idle time] * (bcount_ maintains the byte count in the underlying queue). * If the underlying queue is able to delete packets without * us knowing, then bcount_ will not be maintained properly! */ run_estimator(qib_ ? bcount_ : q_->length(), m + 1); /* * count and count_bytes keeps a tally of arriving traffic * that has not been dropped (i.e. how long, in terms of traffic, * it has been since the last early drop) */ hdr_cmn* ch = (hdr_cmn*)pkt->access(off_cmn_); aggr_bytes += ch->size(); aggr_count ++; /* should only depend on timer based measurement, otherwise unit is not easy to be fixed straightforwardly if(aggr_count >= emp_.rate_count){ updatePrice(); set_rate_timer(); } */ int droptype = DTYPE_UNFORCED; int qlen = qib_ ? bcount_ : q_->length(); int qlim = qib_ ? (qlim_ * emp_.mean_pktsize) : qlim_; curq_ = qlen; // helps to trace queue during arrival, if enabled if (qlen >= qlim) { // see if we've exceeded the queue size droptype = DTYPE_FORCED; } if (droptype == DTYPE_UNFORCED) { /* pick packet for ECN, which is marking in this case */ Packet *pkt_to_mark = pickPacketForECN(pkt); q_->enque(pkt); bcount_ += ch->size(); pkt = pkt_to_mark; random_mark(pkt); } else { /* forced drop, or not a drop: first enqueue pkt */ q_->enque(pkt); bcount_ += ch->size(); /* drop a packet if we were told to */ if (droptype == DTYPE_FORCED) { /* drop random victim or last one */ pkt = pickPacketToDrop(); q_->remove(pkt); bcount_ -= ((hdr_cmn*)pkt->access(off_cmn_))->size(); drop(pkt); } } return; } int REMQueue::command(int argc, const char*const* argv) { Tcl& tcl = Tcl::instance(); if (argc == 2) { if (strcmp(argv[1], "reset") == 0) { reset(); return (TCL_OK); } } else if (argc == 3) { // attach a file for variable tracing if (strcmp(argv[1], "attach") == 0) { int mode; const char* id = argv[2]; tchan_ = Tcl_GetChannel(tcl.interp(), (char*)id, &mode); if (tchan_ == 0) { tcl.resultf("REM: trace: can't attach %s for writing", id); return (TCL_ERROR); } return (TCL_OK); } // tell REM about link stats if (strcmp(argv[1], "link") == 0) { LinkDelay* del = (LinkDelay*)TclObject::lookup(argv[2]); if (del == 0) { tcl.resultf("REM: no LinkDelay object %s", argv[2]); return(TCL_ERROR); } // set ptc now link_ = del; emp_.ptc = link_->bandwidth() / (8. * emp_.mean_pktsize); return (TCL_OK); } } if (!strcmp(argv[1], "packetqueue-attach")) { delete q_; if (!(q_ = (PacketQueue*) TclObject::lookup(argv[2]))) return (TCL_ERROR); else { pq_ = q_; return (TCL_OK); } } return (Queue::command(argc, argv)); } /* * Routine called by TracedVar facility when variables change values. * Currently used to trace values of avg queue size, drop probability, * and the instantaneous queue size seen by arriving packets. * Note that the tracing of each var must be enabled in tcl to work. */ void REMQueue::trace(TracedVar* v) { char wrk[500], *p; if (((p = strstr(v->name(), "ave")) == NULL) && ((p = strstr(v->name(), "prob")) == NULL) && ((p = strstr(v->name(), "curq")) == NULL) && ((p = strstr(v->name(), "price")) == NULL)&& ((p = strstr(v->name(), "marking_prob"))) == NULL) { fprintf(stderr, "REM:unknown trace var %s\n", v->name()); return; } if (tchan_) { int n; double t = Scheduler::instance().clock(); // XXX: be compatible with nsv1 REM trace entries if (*p == 'c') { sprintf(wrk, "Q %g %d", t, int(*((TracedInt*) v))); } else { sprintf(wrk, "%c %g %g", *p, t, double(*((TracedDouble*) v))); } n = strlen(wrk); wrk[n] = '\n'; wrk[n+1] = 0; (void)Tcl_Write(tchan_, wrk, n+1); } return; } void REMQueue:: updatePrice(){ /* measure input rate */ double t = Scheduler::instance().clock(); double t_intv = t - last_aggr_time; double buf = q_->length(); new_aggr_input = (aggr_bytes / t_intv) / emp_.mean_pktsize; emv_.aggr_input = ( 1 - emp_.delta) * emv_.aggr_input + emp_.delta * new_aggr_input; /******************* !!! Time Unit Comments!!! ******************/ /* In p = p' + gamma *(alpha * buf + x - c) * p, p' has an unit of seconds or milliseconds or ... * x, c has an unit of pkts/s by default * however, the sampling interval determines the unit of alpha! * thus alpha 's unit should take the form of /(sample_interval) * moreover, alpha*buf, x and c should have the same unit * so, x, c need to be converted to pkts/(sample_interval) * gammma has an unit of (sample_interval * sample_interval) / pkts, * which yielded that: * gamma *(alpha * buf + x - c) has the unit of sample_interval. * consequently p and p' also have the unit of sample_interval, which * in this case is milliseconds. * * IMPLICATIONS: the source client TCPNewVegas needs to anticipate * price conveyed by the packets to be in ms. * ALTERNATIVE: change gamma to have unit of * (seconds * sample_interval) / pkts, * new_gamma = gamma /1000 in this case, * thus the price conveyed by the packets will be in * the unit of seconds, but in that case the * marking probablity might be too small to be expressed * by a few packets, and it will take longer for the * source to figure out the real price using current * phi_. However, changing phi_' to power(phi_, 1000) * will solve it effectively. * ***************************************************************/ double in_ = emv_.aggr_input * emp_.rate_timeout; double capacity_ = emp_.ptc * emp_.rate_timeout; /* to have price in second unit, use following formular instead: double gamma = emp_.gamma * emp_.rate_timeout; double new_p = emv_.p_l + gamma * (emp_.alpha_l * buf + in_ - capacity_); */ /* update price and marking probablity */ double new_p = emv_.p_l + emp_.gamma * (emp_.alpha_l * buf + in_ - capacity_); // double old_p = emv_.p_l; // printf(" %.6f old p_l: %g new p: %.4f bytes: %d time %.4f\n", t, old_p, new_p, aggr_bytes, t_intv); // printf(" last_time : %.6f capactiy %.4f link : avg queue len %.4f \n", last_aggr_time, emp_.ptc, buf); emv_.p_l = max(new_p, 0); emv_.m_l = 1 - pow(emp_.phi, - emv_.p_l); // print_emv(); aggr_bytes = 0; aggr_count = 0; /* sched not instantiated yet */ } void REMQueue:: timeout(){ updatePrice(); /* schedule next timeout */ set_rate_timer(); } /* for debugging help */ void REMQueue::print_emp() { printf("mean_pktsz: %d\n", emp_.mean_pktsize); printf("delta: %.4f, gamma: %.4f, alpha_l: %.4f, phi: %.4f\n", emp_.delta, emp_.gamma, emp_.alpha_l, emp_.phi); printf("qw: %f, ptc: %f\n", emp_.q_w, emp_.ptc); printf("qlim: %d, idletime: %f\n", qlim_, idletime_); printf("=========\n"); } void REMQueue::print_emv() { double price = emv_.p_l, prob = emv_.m_l; printf("Input_rate: %.4f\n", emv_.aggr_input); printf("Link Price: %g, Marking Probability: %g\n", price, prob); printf("=========\n"); } void AggrRateTimer::expire(Event *e){ q_-> timeout(); }