Package wsgitools :: Module filters
[hide private]
[frames] | no frames]

Source Code for Module wsgitools.filters

  1  """ 
  2  This module contains a generic way to create middelwares that filter data. 
  3  The work is mainly done by the L{WSGIFilterMiddleware} class. One can write 
  4  filters by extending the L{BaseWSGIFilter} class and passing this class 
  5  (not an instance) to the L{WSGIFilterMiddleware} constructor. 
  6  """ 
  7   
  8  __all__ = [] 
  9   
 10  import sys 
 11  import time 
 12  import gzip 
 13  try: 
 14      import io 
 15  except ImportError: 
 16      try: 
 17          import cStringIO as io 
 18      except ImportError: 
 19          import StringIO as io 
 20   
 21  try: 
 22      next 
 23  except NameError: 
24 - def next(it):
25 return it.next()
26 27 __all__.append("CloseableIterator")
28 -class CloseableIterator:
29 """Concatenating iterator with close attribute."""
30 - def __init__(self, close_function, *iterators):
31 """If close_function is not C{None}, it will be the C{close} attribute 32 of the created iterator object. Further parameters specify iterators 33 that are to be concatenated. 34 @type close_function: a function or C{None} 35 """ 36 if close_function is not None: 37 self.close = close_function 38 self.iterators = list(map(iter, iterators))
39 - def __iter__(self):
40 """iterator interface 41 @rtype: gen() 42 """ 43 return self
44 - def next(self):
45 """iterator interface""" 46 if not self.iterators: 47 raise StopIteration 48 try: 49 return next(self.iterators[0]) 50 except StopIteration: 51 self.iterators.pop(0) 52 return next(self)
53 54 __all__.append("CloseableList")
55 -class CloseableList(list):
56 """A list with a close attribute."""
57 - def __init__(self, close_function, *args):
58 """If close_function is not C{None}, it will be the C{close} attribute 59 of the created list object. Other parameters are passed to the list 60 constructor. 61 @type close_function: a function or C{None} 62 """ 63 if close_function is not None: 64 self.close = close_function 65 list.__init__(self, *args)
66 - def __iter__(self):
67 """iterator interface""" 68 return CloseableIterator(getattr(self, "close", None), 69 list.__iter__(self))
70 71 __all__.append("BaseWSGIFilter")
72 -class BaseWSGIFilter:
73 """Generic WSGI filter class to be used with L{WSGIFilterMiddleware}. 74 75 For each request a filter object gets created. 76 The environment is then passed through L{filter_environ}. 77 Possible exceptions are filtered by L{filter_exc_info}. 78 After that for each C{(header, value)} tuple L{filter_header} is used. 79 The resulting list is filtered through L{filter_headers}. 80 Any data is filtered through L{filter_data}. 81 In order to possibly append data the L{append_data} method is invoked. 82 When the request has finished L{handle_close} is invoked. 83 84 All methods do not modify the passed data by default. Passing the 85 L{BaseWSGIFilter} class to a L{WSGIFilterMiddleware} will result in not 86 modifying requests at all. 87 """
88 - def __init__(self):
89 """This constructor does nothing and can safely be overwritten. It is 90 only listed here to document that it must be callable without additional 91 parameters.""" 92 pass
93 - def filter_environ(self, environ):
94 """Receives a dict with the environment passed to the wsgi application 95 and a C{dict} must be returned. The default is to return the same dict. 96 @type environ: {str: str} 97 @rtype: {str: str} 98 """ 99 return environ
100 - def filter_exc_info(self, exc_info):
101 """Receives either C{None} or a tuple passed as third argument to 102 C{start_response} from the wrapped wsgi application. Either C{None} or 103 such a tuple must be returned.""" 104 return exc_info
105 - def filter_status(self, status):
106 """Receives a status string passed as first argument to 107 C{start_response} from the wrapped wsgi application. A valid HTTP status 108 string must be returned. 109 @type status: str 110 @rtype: str 111 """ 112 return status
113 - def filter_header(self, headername, headervalue):
114 """This function is invoked for each C{(headername, headervalue)} tuple 115 in the second argument to the C{start_response} from the wrapped wsgi 116 application. Such a value or C{None} for discarding the header must be 117 returned. 118 @type headername: str 119 @type headervalue: str 120 @rtype: (str, str) 121 """ 122 return (headername, headervalue)
123 - def filter_headers(self, headers):
124 """A list of headers passed as the second argument to the 125 C{start_response} from the wrapped wsgi application is passed to this 126 function and such a list must also be returned. 127 @type headers: [(str, str)] 128 @rtype: [(str, str)] 129 """ 130 return headers
131 - def filter_data(self, data):
132 """For each string that is either written by the C{write} callable or 133 returned from the wrapped wsgi application this method is invoked. It 134 must return a string. 135 @type data: str 136 @rtype: str 137 """ 138 return data
139 - def append_data(self):
140 """This function can be used to append data to the response. A list of 141 strings or some kind of iterable yielding strings has to be returned. 142 The default is to return an empty list. 143 @rtype: gen([str]) 144 """ 145 return []
146 - def handle_close(self):
147 """This method is invoked after the request has finished.""" 148 pass
149 150 __all__.append("WSGIFilterMiddleware")
151 -class WSGIFilterMiddleware:
152 """This wsgi middleware can be used with specialized L{BaseWSGIFilter}s to 153 modify wsgi requests and/or reponses."""
154 - def __init__(self, app, filterclass):
155 """ 156 @param app: is a wsgi application. 157 @type filterclass: L{BaseWSGIFilter}s subclass 158 @param filterclass: is a subclass of L{BaseWSGIFilter} or some class 159 that implements the interface.""" 160 self.app = app 161 self.filterclass = filterclass
162 - def __call__(self, environ, start_response):
163 """wsgi interface 164 @type environ: {str, str} 165 @rtype: gen([str]) 166 """ 167 assert isinstance(environ, dict) 168 reqfilter = self.filterclass() 169 environ = reqfilter.filter_environ(environ) 170 171 def modified_start_response(status, headers, exc_info=None): 172 assert isinstance(status, str) 173 assert isinstance(headers, list) 174 exc_info = reqfilter.filter_exc_info(exc_info) 175 status = reqfilter.filter_status(status) 176 headers = (reqfilter.filter_header(h, v) for h, v in headers) 177 headers = [h for h in headers if h] 178 headers = reqfilter.filter_headers(headers) 179 write = start_response(status, headers, exc_info) 180 def modified_write(data): 181 write(reqfilter.filter_data(data))
182 return modified_write
183 184 ret = self.app(environ, modified_start_response) 185 assert hasattr(ret, "__iter__") 186 187 def modified_close(): 188 reqfilter.handle_close() 189 getattr(ret, "close", lambda:0)() 190 191 if isinstance(ret, list): 192 return CloseableList(modified_close, 193 list(map(reqfilter.filter_data, ret)) 194 + list(reqfilter.append_data())) 195 ret = iter(ret) 196 def late_append_data(): 197 """Invoke C{reqfilter.append_data()} after C{filter_data()} has seen 198 all data.""" 199 for data in reqfilter.append_data(): 200 yield data 201 return CloseableIterator(modified_close, 202 (reqfilter.filter_data(data) for data in ret), 203 late_append_data()) 204
205 # Using map and lambda here since pylint cannot handle list comprehension in 206 # default arguments. Also note that neither ' nor " are considered printable. 207 -def escape_string(string, replacer=list(map( 208 lambda i: chr(i) if chr(i).isalnum() or 209 chr(i) in '!#$%&()*+,-./:;<=>?@[\\]^_`{|}~ ' else 210 r"\x%2.2x" % i, 211 range(256)))):
212 """Encodes non-printable characters in a string using \\xXX escapes. 213 214 @type string: str 215 @rtype: str 216 """ 217 return "".join(replacer[ord(char)] for char in string)
218 219 __all__.append("RequestLogWSGIFilter")
220 -class RequestLogWSGIFilter(BaseWSGIFilter):
221 """This filter logs all requests in the apache log file format.""" 222 @classmethod
223 - def creator(cls, log):
224 """Returns a function creating L{RequestLogWSGIFilter}s on given log 225 file. log has to be a file-like object. 226 @type log: file-like 227 """ 228 return lambda:cls(log)
229 - def __init__(self, log=sys.stdout):
230 """ 231 @type log: file-like 232 """ 233 BaseWSGIFilter.__init__(self) 234 assert hasattr(log, "write") 235 self.log = log 236 self.remote = "?" 237 self.user = "-" 238 self.time = time.strftime("%d/%b/%Y:%T %z") 239 self.reqmethod = "" 240 self.path = "" 241 self.proto = None 242 self.status = "" 243 self.length = 0 244 self.referrer = None 245 self.useragent = None
246 - def filter_environ(self, environ):
247 """BaseWSGIFilter interface 248 @type environ: {str: str} 249 @rtype: {str: str} 250 """ 251 assert isinstance(environ, dict) 252 self.remote = environ.get("REMOTE_ADDR", self.remote) 253 self.user = environ.get("REMOTE_USER", self.user) 254 self.reqmethod = environ["REQUEST_METHOD"] 255 self.path = environ["SCRIPT_NAME"] + environ["PATH_INFO"] 256 if environ.get("QUERY_STRING"): 257 self.path = "%s?%s" % (self.path, environ["QUERY_STRING"]) 258 self.proto = environ.get("SERVER_PROTOCOL", self.proto) 259 self.referrer = environ.get("HTTP_REFERER", self.referrer) 260 self.useragent = environ.get("HTTP_USER_AGENT", self.useragent) 261 return environ
262 - def filter_status(self, status):
263 """BaseWSGIFilter interface 264 @type status: str 265 @rtype: str 266 """ 267 assert isinstance(status, str) 268 self.status = status.split()[0] 269 return status
270 - def filter_data(self, data):
271 """BaseWSGIFilter interface 272 @type data: str 273 @rtype: str 274 """ 275 self.length += len(data) 276 return data
277 - def handle_close(self):
278 """BaseWSGIFilter interface""" 279 line = '%s %s - [%s]' % (self.remote, self.user, self.time) 280 line = '%s "%s %s' % (line, escape_string(self.reqmethod), 281 escape_string(self.path)) 282 if self.proto is not None: 283 line = "%s %s" % (line, self.proto) 284 line = '%s" %s %d' % (line, self.status, self.length) 285 if self.referrer is not None: 286 line = '%s "%s"' % (line, escape_string(self.referrer)) 287 else: 288 line += " -" 289 if self.useragent is not None: 290 line = '%s "%s"' % (line, escape_string(self.useragent)) 291 else: 292 line += " -" 293 self.log.write("%s\n" % line)
294 295 __all__.append("TimerWSGIFilter")
296 -class TimerWSGIFilter(BaseWSGIFilter):
297 """Replaces a specific string in the data returned from the filtered wsgi 298 application with the time the request took. The string has to be exactly 299 eight bytes long, defaults to C{"?GenTime"} and must be an element of the 300 iterable returned by the filtered application. If the application returns 301 something like C{["spam?GenTime", "?GenTime spam", "?GenTime"]} only the 302 last occurance get's replaced.""" 303 @classmethod
304 - def creator(cls, pattern):
305 """Returns a function creating L{TimerWSGIFilter}s with a given pattern 306 beeing a string of exactly eight bytes. 307 @type pattern: str 308 """ 309 return lambda:cls(pattern)
310 - def __init__(self, pattern="?GenTime"):
311 """ 312 @type pattern: str 313 """ 314 BaseWSGIFilter.__init__(self) 315 self.pattern = pattern 316 self.start = time.time()
317 - def filter_data(self, data):
318 """BaseWSGIFilter interface 319 @type data: str 320 @rtype: str 321 """ 322 if data == self.pattern: 323 return "%8.3g" % (time.time() - self.start) 324 return data
325 326 __all__.append("EncodeWSGIFilter")
327 -class EncodeWSGIFilter(BaseWSGIFilter):
328 """Encodes all body data (no headers) with given charset. 329 @note: This violates the wsgi standard as it requires unicode objects 330 whereas wsgi mandates the use of str. 331 """ 332 @classmethod
333 - def creator(cls, charset):
334 """Returns a function creating L{EncodeWSGIFilter}s with a given 335 charset. 336 @type charset: str 337 """ 338 return lambda:cls(charset)
339 - def __init__(self, charset="utf-8"):
340 """ 341 @type charset: str 342 """ 343 BaseWSGIFilter.__init__(self) 344 self.charset = charset
345 - def filter_data(self, data):
346 """BaseWSGIFilter interface 347 @type data: str 348 @rtype: str 349 """ 350 return data.encode(self.charset)
351 - def filter_header(self, header, value):
352 """BaseWSGIFilter interface 353 @type header: str 354 @type value: str 355 @rtype: (str, str) 356 """ 357 if header.lower() != "content-type": 358 return (header, value) 359 return (header, "%s; charset=%s" % (value, self.charset))
360 361 __all__.append("GzipWSGIFilter")
362 -class GzipWSGIFilter(BaseWSGIFilter):
363 """Compresses content using gzip.""" 364 @classmethod
365 - def creator(cls, flush=True):
366 """ 367 Returns a function creating L{GzipWSGIFilter}s. 368 @type flush: bool 369 @param flush: whether or not the filter should always flush the buffer 370 """ 371 return lambda:cls(flush)
372 - def __init__(self, flush=True):
373 """ 374 @type flush: bool 375 @param flush: when true does not pump data necessarily immediately but 376 accumulate to get a better compression ratio 377 """ 378 BaseWSGIFilter.__init__(self) 379 self.flush = flush 380 self.compress = False 381 self.sio = None 382 self.gzip = None
383 - def filter_environ(self, environ):
384 """BaseWSGIFilter interface 385 @type environ: {str: str} 386 """ 387 assert isinstance(environ, dict) 388 if "HTTP_ACCEPT_ENCODING" in environ: 389 acceptenc = environ["HTTP_ACCEPT_ENCODING"].split(',') 390 acceptenc = map(str.strip, acceptenc) 391 if "gzip" in acceptenc: 392 self.compress = True 393 self.sio = io.StringIO() 394 self.gzip = gzip.GzipFile(fileobj=self.sio, mode="w") 395 return environ
396 - def filter_header(self, headername, headervalue):
397 """ BaseWSGIFilter interface 398 @type headername: str 399 @type headervalue: str 400 @rtype: (str, str) or None 401 """ 402 if self.compress: 403 if headername.lower() == "content-length": 404 return None 405 return (headername, headervalue)
406 - def filter_headers(self, headers):
407 """BaseWSGIFilter interface 408 @type headers: [(str, str)] 409 @rtype: [(str, str)] 410 """ 411 assert isinstance(headers, list) 412 if self.compress: 413 headers.append(("Content-encoding", "gzip")) 414 return headers
415 - def filter_data(self, data):
416 """BaseWSGIFilter interface 417 @type data: str 418 @rtype: str 419 """ 420 if not self.compress: 421 return data 422 self.gzip.write(data) 423 if self.flush: 424 self.gzip.flush() 425 data = self.sio.getvalue() 426 self.sio.truncate(0) 427 return data
428 - def append_data(self):
429 """BaseWSGIFilter interface 430 @rtype: [str] 431 """ 432 if not self.compress: 433 return [] 434 self.gzip.close() 435 data = self.sio.getvalue() 436 return [data]
437
438 -class ReusableWSGIInputFilter(BaseWSGIFilter):
439 """Make C{environ["wsgi.input"]} readable multiple times. Although this is 440 not required by the standard it is sometimes desirable to read C{wsgi.input} 441 multiple times. This filter will therefore replace that variable with a 442 C{StringIO} instance which provides a C{seek} method. 443 """ 444 @classmethod
445 - def creator(cls, maxrequestsize):
446 """ 447 Returns a function creating L{ReusableWSGIInputFilter}s with desired 448 maxrequestsize being set. If there is more data than maxrequestsize is 449 available in C{wsgi.input} the rest will be ignored. (It is up to the 450 adapter to eat this data.) 451 @type maxrequestsize: int 452 @param maxrequestsize: is the maximum number of bytes to store in the 453 C{StringIO} 454 """ 455 return lambda:cls(maxrequestsize)
456 - def __init__(self, maxrequestsize=65536):
457 """ReusableWSGIInputFilters constructor. 458 @type maxrequestsize: int 459 @param maxrequestsize: is the maximum number of bytes to store in the 460 C{StringIO}, see L{creator} 461 """ 462 BaseWSGIFilter.__init__(self) 463 self.maxrequestsize = maxrequestsize
464
465 - def filter_environ(self, environ):
466 """BaseWSGIFilter interface 467 @type environ: {str: str} 468 """ 469 470 if isinstance(environ["wsgi.input"], io.StringIO): 471 return environ # nothing to be done 472 473 # XXX: is this really a good idea? use with care 474 environ["wsgitools.oldinput"] = environ["wsgi.input"] 475 data = io.StringIO(environ["wsgi.input"].read(self.maxrequestsize)) 476 environ["wsgi.input"] = data 477 478 return environ
479