Source code for ZEO.ClientStorage

# Copyright (c) 2001, 2002, 2003 Zope Foundation and Contributors.
# All Rights Reserved.
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
"""The ClientStorage class and the exceptions that it may raise.

Public contents of this module:

ClientStorage -- the main class, implementing the Storage API

import logging
import os
import re
import socket
import stat
import sys
import threading
import time
import weakref
from binascii import hexlify

import BTrees.OOBTree

import zc.lockfile
import ZODB
import ZODB.BaseStorage
import ZODB.ConflictResolution
import ZODB.interfaces
import zope.interface
import six

from persistent.TimeStamp import TimeStamp
from ZEO._compat import get_ident
from ZEO._compat import WIN
from ZEO.Exceptions import ClientDisconnected
from ZEO.TransactionBuffer import TransactionBuffer
from ZODB import POSException
from ZODB import utils

import ZEO.asyncio.client
import ZEO.cache

logger = logging.getLogger(__name__)

def tid2time(tid):
    return str(TimeStamp(tid))

def get_timestamp(prev_ts=None):
    """Internal helper to return a unique TimeStamp instance.

    If the optional argument is not None, it must be a TimeStamp; the
    return value is then guaranteed to be at least 1 microsecond later
    the argument.

    t = time.time()
    t = TimeStamp(*time.gmtime(t)[:5] + (t % 60,))
    if prev_ts is not None:
        t = t.laterThan(prev_ts)
    return t

MB = 1024**2

[docs]@zope.interface.implementer(ZODB.interfaces.IMultiCommitStorage) class ClientStorage(ZODB.ConflictResolution.ConflictResolvingStorage): """A storage class that is a network client to a remote storage. This is a faithful implementation of the Storage API. This class is thread-safe; transactions are serialized in tpc_begin(). """ def __init__(self, addr, storage='1', cache_size=20 * MB, name='', wait_timeout=None, disconnect_poll=None, read_only=0, read_only_fallback=0, blob_dir=None, shared_blob_dir=False, blob_cache_size=None, blob_cache_size_check=10, client_label=None, cache=None, ssl=None, ssl_server_hostname=None, # Mostly ignored backward-compatability options client=None, var=None, min_disconnect_poll=1, max_disconnect_poll=None, wait=True, drop_cache_rather_verify=True, credentials=None, server_sync=False, # The ZODB-define ZConfig support may ball these: username=None, password=None, realm=None, # For tests: _client_factory=ZEO.asyncio.client.ClientThread, ): """ClientStorage constructor. This is typically invoked from a file. All arguments except addr should be keyword arguments. Arguments: addr The server address(es). This is either a list of addresses or a single address. Each address can be a (hostname, port) tuple to signify a TCP/IP connection or a pathname string to signify a Unix domain socket connection. A hostname may be a DNS name or a dotted IP address. Required. All addresses are assumed to serve (essentially) the same (potentially replicated) storage. A connection tries to connect to those addresses; the first successful connection establishment with the called for ("read_only" or "writable") capabilities is selected and used for storage interaction until the connection is lost. In that case, a reconnection is tried. If ``ClientStorage`` calls for the "writable" capability but allows for a "read only" fallback,, a read only connection can be used as a fallback; if a writable connection becomes available later, a switch to this connection is performed. storage The server storage name, defaulting to '1'. The name must match one of the storage names supported by the server(s) specified by the addr argument. cache_size The disk cache size, defaulting to 20 megabytes. This is passed to the ClientCache constructor. name The storage name, defaulting to a combination of the address and the server storage name. This is used to construct the response to getName() wait_timeout Maximum time (seconds) to wait for connections, defaulting to 30. Note: the timeout applies only to [re]connect. Normal operations can take arbitrary long. This is important for long running operations, such as ``pack``. read_only A flag indicating whether this should be a read-only storage, defaulting to false (i.e. writing is allowed by default). read_only_fallback A flag indicating whether a read-only remote storage should be acceptable as a fallback when no writable storages are available. Defaults to false. At most one of read_only and read_only_fallback should be true. blob_dir directory path for blob data. 'blob data' is data that is retrieved via the loadBlob API. shared_blob_dir Flag whether the blob_dir is a server-shared filesystem that should be used instead of transferring blob data over the ZEO protocol. blob_cache_size Maximum size of the ZEO blob cache, in bytes. If not set, then the cache size isn't checked and the blob directory will grow without bound. This option is ignored if shared_blob_dir is true. blob_cache_size_check Cache check size as percent of blob_cache_size. The ZEO cache size will be checked when this many bytes have been loaded into the cache. Defaults to 10% of the blob cache size. This option is ignored if shared_blob_dir is true. client_label A label to include in server log messages for the client. cache A cache object or a file path (relative or absolute). Defaults to None, in which case the cache is determined from client and var. ssl An ssl client context (i.e. with purpose "ServerAuth") to call for SSL connections. ssl_server_hostname The server hostname - used during the SSL authentication check client var If cache is None, client determines the cache: if it is None, then a non persistent cache is used; otherwise, client is used together with var (defaults to the current working directory) to construct the file path for the persistent cache file wait Wait for server connection, defaulting to true. credentials [Experimental] Credentials object for authentication to server. server_sync Whether sync() should make a server round trip, thus causing client to wait for outstanding invalidations. The `sync` is called in `transaction.begin`. A server round trip at this place guarantees that the transaction takes notice of all prior modifications. This may be important when several client processes share the same ZODB as the following examples demonstrate. Example 1: Assume that a user issues requests req1 and then req2 where req1 modifies data read by req2. If req1 and req2 are processed by different Zope processes, then the transaction started for req2 processing may see a ZODB state which does not yet include the req1 modifications. A server round trip avoids this. Example 2: A similar situation arises when 2 Zope processes communicate via non ZODB means to inform the other about a state change. If this communication happens to be faster than the ZODB internal state change propagation then the target process again risks to not yet see the changed state. A server round trip, again, avoids this. Defaults to false. username password realm disconnect_poll min_disconnect_poll max_disconnect_poll drop_cache_rather_verify ignored; retained (as parameters) for compatibility """ if isinstance(addr, int): addr = ('', addr) self.__name__ = name or str(addr) # Standard convention for storages if isinstance(addr, six.string_types): if WIN: raise ValueError("Unix sockets are not available on Windows") addr = [addr] elif (isinstance(addr, tuple) and len(addr) == 2 and isinstance(addr[0], six.string_types) and isinstance(addr[1], int)): addr = [addr] "%s %s (pid=%d) created %s/%s for storage: %r", self.__name__, self.__class__.__name__, os.getpid(), read_only and "RO" or "RW", read_only_fallback and "fallback" or "normal", storage, ) self._is_read_only = read_only self._read_only_fallback = read_only_fallback self._addr = addr # For tests self._iterators = weakref.WeakValueDictionary() self._iterator_ids = set() self._storage = storage # _server_addr is used by sortKey() self._server_addr = None self._client_label = client_label self._info = {'length': 0, 'size': 0, 'name': 'ZEO Client', 'supportsUndo': 0, 'interfaces': ()} self._db = None self._oids = [] # List of pre-fetched oids from server cache = self._cache = open_cache( cache, var, client, storage, cache_size) # XXX need to check for POSIX-ness here self.blob_dir = blob_dir self.shared_blob_dir = shared_blob_dir if blob_dir is not None: # Avoid doing this import unless we need it, as it # currently requires pywin32 on Windows. import ZODB.blob if shared_blob_dir: self.fshelper = ZODB.blob.FilesystemHelper(blob_dir) else: if 'zeocache' not in ZODB.blob.LAYOUTS: ZODB.blob.LAYOUTS['zeocache'] = BlobCacheLayout() self.fshelper = ZODB.blob.FilesystemHelper( blob_dir, layout_name='zeocache') self.fshelper.create() else: self.fshelper = None self._blob_cache_size = blob_cache_size self._blob_data_bytes_loaded = 0 if blob_cache_size is not None: assert blob_cache_size_check < 100 self._blob_cache_size_check = ( blob_cache_size * blob_cache_size_check // 100) self._check_blob_size() self.server_sync = server_sync self._server = _client_factory( addr, self, cache, storage, ZEO.asyncio.client.Fallback if read_only_fallback else read_only, wait_timeout or 30, ssl=ssl, ssl_server_hostname=ssl_server_hostname, credentials=credentials, ) self._call = self._async = self._server.async_ self._async_iter = self._server.async_iter self._wait = self._server.wait self._commit_lock = threading.Lock() if wait: try: self._wait() except Exception: # No point in keeping the server going if the storage # creation fails self._server.close() raise def new_addr(self, addr): self._addr = addr self._server.new_addrs(self._normalize_addr(addr)) def _normalize_addr(self, addr): if isinstance(addr, int): addr = ('', addr) if isinstance(addr, str): addr = [addr] elif (isinstance(addr, tuple) and len(addr) == 2 and isinstance(addr[0], str) and isinstance(addr[1], int)): addr = [addr] return addr
[docs] def close(self): "Storage API: finalize the storage, releasing external resources." self._server.close() if self._check_blob_size_thread is not None: self._check_blob_size_thread.join() = self.sync = None # break reference cycles
_check_blob_size_thread = None def _check_blob_size(self, bytes=None): if self._blob_cache_size is None: return if self.shared_blob_dir or not self.blob_dir: return if (bytes is not None) and (bytes < self._blob_cache_size_check): return self._blob_data_bytes_loaded = 0 target = max(self._blob_cache_size - self._blob_cache_size_check, 0) check_blob_size_thread = threading.Thread( target=_check_blob_cache_size, args=(self.blob_dir, target), name="%s zeo client check blob size thread" % self.__name__, ) check_blob_size_thread.setDaemon(True) check_blob_size_thread.start() self._check_blob_size_thread = check_blob_size_thread
[docs] def registerDB(self, db): """Storage API: register a database for invalidation messages. This is called by ZODB.DB (and by some tests). The storage isn't really ready to use until after this call. """ super(ClientStorage, self).registerDB(db) self._db = db
[docs] def is_connected(self, test=False): """Return whether the storage is currently connected to a server.""" return self._server.is_connected()
def sync(self): # The separate async thread should keep us up to date pass _connection_generation = 0
[docs] def notify_connected(self, conn, info): """The connection is about to be established via *conn*. *conn* is an ``asyncio.client.ClientIO`` instance. We can use it already for asynchronous server calls but must not make synchronous calls or use the normal server call API (would lead to deadlock or ``ClientDisconnected``). *info* is a ``dict`` providing information about the server (and its associated storage). """ self.set_server_addr(conn.get_peername()) self.protocol_version = conn.protocol_version self._is_read_only = conn.is_read_only() # invalidate our db cache if self._db is not None: self._db.invalidateCache()"%s %s to storage: %s", self.__name__, 'Reconnected' if self._connection_generation else 'Connected', self._server_addr) self._connection_generation += 1 if self._client_label: # Note: we cannot yet use ``_async`` (connection not yet # officially established) but can already use # ``ClientIO.call_async`` conn.call_async('set_client_label', (self._client_label,)) self._info.update(info) for iface in (ZODB.interfaces.IStorageRestoreable, ZODB.interfaces.IStorageIteration, ZODB.interfaces.IStorageUndoable, ZODB.interfaces.IStorageCurrentRecordIteration, ZODB.interfaces.IBlobStorage, ZODB.interfaces.IExternalGC): if (iface.__module__, iface.__name__) in \ self._info.get('interfaces', ()): zope.interface.alsoProvides(self, iface) if self.protocol_version[1:] >= b'5': = lambda: self._call('ping') else: = lambda: self._call('lastTransaction') if self.server_sync: self.sync =
def set_server_addr(self, addr): # Normalize server address and convert to string if isinstance(addr, str): self._server_addr = addr else: assert isinstance(addr, tuple) # If the server is on a remote host, we need to guarantee # that all clients used the same name for the server. If # they don't, the sortKey() may be different for each client. # The best solution seems to be the official name reported # by gethostbyaddr(). host = addr[0] try: canonical, aliases, addrs = socket.gethostbyaddr(host) except socket.error as err: logger.debug("%s Error resolving host: %s (%s)", self.__name__, host, err) canonical = host self._server_addr = str((canonical, addr[1])) def sortKey(self): # XXX sortKey should be explicit, possibly based on database name. # If the client isn't connected to anything, it can't have a # valid sortKey(). Raise an error to stop the transaction early. if self._server_addr is None: raise ClientDisconnected else: return '%s:%s' % (self._storage, self._server_addr)
[docs] def notify_disconnected(self): """Internal: notify that the server connection was terminated. This is called by ConnectionManager when the connection is closed or when certain problems with the connection occur. """"%s Disconnected from storage: %r", self.__name__, self._server_addr) self._iterator_gc(True) self._connection_generation += 1 self._is_read_only = self._server.is_read_only()
def __len__(self): """Return the size of the storage.""" # TODO: Is this method used? return self._info['length']
[docs] def getName(self): """Storage API: return the storage name as a string. The return value consists of two parts: the name as determined by the name and addr argments to the ClientStorage constructor, and the string 'connected' or 'disconnected' in parentheses indicating whether the storage is (currently) connected. """ return "%s (%s)" % ( self.__name__, self.is_connected() and "connected" or "disconnected")
[docs] def getSize(self): """Storage API: an approximate size of the database, in bytes.""" return self._info['size']
[docs] def supportsUndo(self): """Storage API: return whether we support undo.""" return self._info['supportsUndo']
[docs] def is_read_only(self): """Storage API: return whether we are in read-only mode. """ return self._is_read_only or self._server.is_read_only()
isReadOnly = is_read_only def _check_trans(self, trans, meth): """Internal helper to check a transaction argument for sanity.""" if self._is_read_only: raise POSException.ReadOnlyError() try: buf = except KeyError: buf = None if buf is None: raise POSException.StorageTransactionError( "Transaction not committing", meth, trans) if buf.connection_generation != self._connection_generation: # We were disconnected, so this one is poisoned raise ClientDisconnected(meth, 'on a disconnected transaction') return buf
[docs] def history(self, oid, size=1): """Storage API: return a sequence of HistoryEntry objects. """ return self._call('history', oid, size)
[docs] def record_iternext(self, next=None): """Storage API: get the next database record. This is part of the conversion-support API. """ return self._call('record_iternext', next)
def getTid(self, oid): # XXX deprecated: but ZODB tests use this. They shouldn't return self._call('getTid', oid)
[docs] def loadSerial(self, oid, serial): """Storage API: load a historical revision of an object.""" return self._call('loadSerial', oid, serial)
def load(self, oid, version=''): # Note: cache correctness is guaranteed only upto # ``_cache.getLastTid()``. The following call therefore # may return outdated information. result = self.loadBefore(oid, utils.maxtid) if result is None: raise POSException.POSKeyError(oid) return result[:2] def loadBefore(self, oid, tid): # Note: cache correctness is guaranteed only for # ``tid <= _cache.getLastTid()``. # For larger *tid*, the cache may contain outdated information. result = self._cache.loadBefore(oid, tid) if result: return result return self._server.load_before(oid, tid) def prefetch(self, oids, tid): self._server.prefetch(oids, tid)
[docs] def new_oid(self): """Storage API: return a new object identifier. """ if self._is_read_only: raise POSException.ReadOnlyError() while 1: try: return self._oids.pop() except IndexError: pass # We ran out. We need to get some more. self._oids[:0] = reversed(self._call('new_oids'))
[docs] def pack(self, t=None, referencesf=None, wait=1, days=0): """Storage API: pack the storage. Deviations from the Storage API: the referencesf argument is ignored; two additional optional arguments wait and days are provided: wait -- a flag indicating whether to wait for the pack to complete; defaults to true. days -- a number of days to subtract from the pack time; defaults to zero. """ # TODO: Is it okay that read-only connections allow pack()? # rf argument ignored; server will provide its own implementation if t is None: t = time.time() t = t - (days * 86400) return self._call('pack', t, wait)
[docs] def store(self, oid, serial, data, version, txn): """Storage API: store data for an object.""" assert not version tbuf = self._check_trans(txn, 'store') self._async('storea', oid, serial, data, id(txn)), data)
def checkCurrentSerialInTransaction(self, oid, serial, transaction): self._check_trans(transaction, 'checkCurrentSerialInTransaction') self._async( 'checkCurrentSerialInTransaction', oid, serial, id(transaction))
[docs] def storeBlob(self, oid, serial, data, blobfilename, version, txn): """Storage API: store a blob object.""" assert not version tbuf = self._check_trans(txn, 'storeBlob') # Grab the file right away. That way, if we don't have enough # room for a copy, we'll know now rather than in tpc_finish. # Also, this releaves the client of having to manage the file # (or the directory contianing it). self.fshelper.getPathForOID(oid, create=True) fd, target = self.fshelper.blob_mkstemp(oid, serial) os.close(fd) # It's a bit odd (and impossible on windows) to rename over # an existing file. We'll use the temporary file name as a base. target += '-' ZODB.blob.rename_or_copy_blob(blobfilename, target) os.remove(target[:-1]) serials =, serial, data, '', txn) if self.shared_blob_dir: self._async( 'storeBlobShared', oid, serial, data, os.path.basename(target), id(txn)) else: # Store a blob to the server. We don't want to read all of # the data into memory, so we use a message iterator. This # allows us to read the blob data as needed. def store(): yield ('storeBlobStart', ()) f = open(target, 'rb') while 1: chunk = if not chunk: break yield ('storeBlobChunk', (chunk, )) f.close() yield ('storeBlobEnd', (oid, serial, data, id(txn))) self._async_iter(store()) tbuf.storeBlob(oid, target) return serials
def receiveBlobStart(self, oid, serial): logger.debug("receiveBlobStart for %r, %r", oid, serial) blob_filename = self.fshelper.getBlobFilename(oid, serial) assert not os.path.exists(blob_filename) lockfilename = os.path.join(os.path.dirname(blob_filename), '.lock') assert os.path.exists(lockfilename) blob_filename += '.dl' assert not os.path.exists(blob_filename) f = open(blob_filename, 'wb') f.close() def receiveBlobChunk(self, oid, serial, chunk): logger.debug("receiveBlobChunk for %r, %r", oid, serial) blob_filename = self.fshelper.getBlobFilename(oid, serial)+'.dl' assert os.path.exists(blob_filename) f = open(blob_filename, 'r+b'), 2) f.write(chunk) f.close() self._blob_data_bytes_loaded += len(chunk) self._check_blob_size(self._blob_data_bytes_loaded) def receiveBlobStop(self, oid, serial): logger.debug("receiveBlobStop for %r, %r", oid, serial) blob_filename = self.fshelper.getBlobFilename(oid, serial) os.rename(blob_filename+'.dl', blob_filename) os.chmod(blob_filename, stat.S_IREAD) def deleteObject(self, oid, serial, txn): tbuf = self._check_trans(txn, 'deleteObject') self._async('deleteObject', oid, serial, id(txn)), None) def loadBlob(self, oid, serial): # Load a blob. If it isn't present and we have a shared blob # directory, then assume that it doesn't exist on the server # and return None. if self.fshelper is None: raise POSException.Unsupported("No blob cache directory is " "configured.") blob_filename = self.fshelper.getBlobFilename(oid, serial) if self.shared_blob_dir: if os.path.exists(blob_filename): return blob_filename else: # We're using a server shared cache. If the file isn't # here, it's not anywhere. raise POSException.POSKeyError( "No blob file at %s" % blob_filename, oid, serial) if os.path.exists(blob_filename): return _accessed(blob_filename) # First, we'll create the directory for this oid, if it doesn't exist. self.fshelper.createPathForOID(oid) # OK, it's not here and we (or someone) needs to get it. We # want to avoid getting it multiple times. We want to avoid # getting it multiple times even accross separate client # processes on the same machine. We'll use file locking. lock = _lock_blob(blob_filename) try: # We got the lock, so it's our job to download it. First, # we'll double check that someone didn't download it while we # were getting the lock: if os.path.exists(blob_filename): return _accessed(blob_filename) # Ask the server to send it to us. When this function # returns, it will have been sent. (The recieving will # have been handled by the IO thread.) self._call('sendBlob', oid, serial) if os.path.exists(blob_filename): return _accessed(blob_filename) raise POSException.POSKeyError("No blob file", oid, serial) finally: lock.close() def openCommittedBlobFile(self, oid, serial, blob=None): blob_filename = self.loadBlob(oid, serial) try: if blob is None: return open(blob_filename, 'rb') else: return ZODB.blob.BlobFile(blob_filename, 'r', blob) except (IOError): # The file got removed while we were opening. # Fall through and try again with the protection of the lock. pass lock = _lock_blob(blob_filename) try: blob_filename = self.fshelper.getBlobFilename(oid, serial) if not os.path.exists(blob_filename): if self.shared_blob_dir: # We're using a server shared cache. If the file isn't # here, it's not anywhere. raise POSException.POSKeyError("No blob file", oid, serial) self._call('sendBlob', oid, serial) if not os.path.exists(blob_filename): raise POSException.POSKeyError("No blob file", oid, serial) _accessed(blob_filename) if blob is None: return open(blob_filename, 'rb') else: return ZODB.blob.BlobFile(blob_filename, 'r', blob) finally: lock.close() def temporaryDirectory(self): return self.fshelper.temp_dir
[docs] def tpc_vote(self, txn): """Storage API: vote on a transaction. """ tbuf = self._check_trans(txn, 'tpc_vote') try: conflicts = True vote_attempts = 0 while conflicts and vote_attempts < 9: # 9? Mainly avoid inf. loop conflicts = False for oid in self._call('vote', id(txn)) or (): if isinstance(oid, dict): # Conflict, let's try to resolve it conflicts = True conflict = oid oid = conflict['oid'] committed, read = conflict['serials'] data = self.tryToResolveConflict( oid, committed, read, conflict['data']) self._async('storea', oid, committed, data, id(txn)) tbuf.resolve(oid, data) else: tbuf.server_resolve(oid) vote_attempts += 1 except POSException.StorageTransactionError: # Hm, we got disconnected and reconnected bwtween # _check_trans and voting. Let's chack the transaction again: self._check_trans(txn, 'tpc_vote') raise except POSException.ConflictError as err: oid = getattr(err, 'oid', None) if oid is not None: # This is a band-aid to help recover from a situation # that shouldn't happen. A Client somehow misses some # invalidations and has out of date data in its # cache. We need some whay to invalidate the cache # entry without invalidations. So, if we see a # (unresolved) conflict error, we assume that the # cache entry is bad and invalidate it. self._cache.invalidate(oid, None) raise if tbuf.exception: raise tbuf.exception if tbuf.server_resolved or tbuf.client_resolved: return list(tbuf.server_resolved) + list(tbuf.client_resolved) else: return None
def tpc_transaction(self): return self._transaction
[docs] def tpc_begin(self, txn, tid=None, status=' '): """Storage API: begin a transaction.""" if self._is_read_only: raise POSException.ReadOnlyError() try: tbuf = except AttributeError: # Gaaaa. This is a recovery transaction. Work around this # until we can think of something better. XXX tb = {} = tb.__getitem__ txn.set_data = tb.__setitem__ except KeyError: pass else: if tbuf is not None: raise POSException.StorageTransactionError( "Duplicate tpc_begin calls for same transaction") txn.set_data(self, TransactionBuffer(self._connection_generation)) # XXX we'd like to allow multiple transactions at a time at some point, # but for now, due to server limitations, TCBOO. self._commit_lock.acquire() self._tbuf = try: self._async( 'tpc_begin', id(txn), txn.user, txn.description, txn.extension, tid, status) except ClientDisconnected: self.tpc_end(txn) raise
def tpc_end(self, txn): tbuf = if tbuf is not None: tbuf.close() txn.set_data(self, None) self._commit_lock.release() def lastTransaction(self): return self._cache.getLastTid()
[docs] def tpc_abort(self, txn, timeout=None): """Storage API: abort a transaction. (The timeout keyword argument is for tests to wait longer than they normally would.) """ try: tbuf = # NOQA: F841 unused variable except KeyError: return try: # Caution: Are there any exceptions that should prevent an # abort from occurring? It seems wrong to swallow them # all, yet you want to be sure that other abort logic is # executed regardless. try: # It's tempting to make an asynchronous call here, but # it's useful for it to be synchronous because, if we # failed due to a disconnect, synchronous calls will # wait a little while in hopes of reconnecting. If # we're able to reconnect and retry the transaction, # ten it might succeed! self._call('tpc_abort', id(txn), timeout=timeout) except ClientDisconnected: logger.debug("%s ClientDisconnected in tpc_abort() ignored", self.__name__) finally: self._iterator_gc() self.tpc_end(txn)
[docs] def tpc_finish(self, txn, f=lambda tid: None): """Storage API: finish a transaction.""" tbuf = self._check_trans(txn, 'tpc_finish') try: tid = self._server.tpc_finish(id(txn), tbuf, f) finally: self.tpc_end(txn) self._iterator_gc() self._update_blob_cache(tbuf, tid) return tid
def _update_blob_cache(self, tbuf, tid): """Internal helper move blobs updated by a transaction to the cache. """ # Not sure why _update_cache() would be called on a closed storage. if self._cache is None: return if self.fshelper is not None: blobs = tbuf.blobs had_blobs = False while blobs: oid, blobfilename = blobs.pop() self._blob_data_bytes_loaded += os.stat(blobfilename).st_size self.fshelper.getPathForOID(oid, create=True) target_blob_file_name = self.fshelper.getBlobFilename(oid, tid) lock = _lock_blob(target_blob_file_name) try: ZODB.blob.rename_or_copy_blob( blobfilename, target_blob_file_name, ) finally: lock.close() had_blobs = True if had_blobs: self._check_blob_size(self._blob_data_bytes_loaded)
[docs] def undo(self, trans_id, txn): """Storage API: undo a transaction. This is executed in a transactional context. It has no effect until the transaction is committed. It can be undone itself. Zope uses this to implement undo unless it is not supported by a storage. """ self._check_trans(txn, 'undo') self._async('undoa', trans_id, id(txn))
[docs] def undoInfo(self, first=0, last=-20, specification=None): """Storage API: return undo information.""" return self._call('undoInfo', first, last, specification)
[docs] def undoLog(self, first=0, last=-20, filter=None): """Storage API: return a sequence of TransactionDescription objects. The filter argument should be None or left unspecified, since it is impossible to pass the filter function to the server to be executed there. If filter is not None, an empty sequence is returned. """ if filter is not None: return [] return self._call('undoLog', first, last)
# Recovery support
[docs] def copyTransactionsFrom(self, other, verbose=0): """Copy transactions from another storage. This is typically used for converting data from one storage to another. `other` must have an .iterator() method. """ ZODB.BaseStorage.copy(other, self, verbose)
[docs] def restore(self, oid, serial, data, version, prev_txn, transaction): """Write data already committed in a separate database.""" assert not version self._check_trans(transaction, 'restore') self._async('restorea', oid, serial, data, prev_txn, id(transaction))
# Below are methods invoked by the StorageServer
[docs] def info(self, dict): """Server callback to update the info dictionary.""" self._info.update(dict)
def invalidateCache(self): if self._db is not None: self._db.invalidateCache()
[docs] def invalidateTransaction(self, tid, oids): """Server callback: Invalidate objects modified by tid.""" if self._db is not None: self._db.invalidate(tid, oids)
# IStorageIteration
[docs] def iterator(self, start=None, stop=None): """Return an IStorageTransactionInformation iterator.""" # iids are "iterator IDs" that can be used to query an iterator whose # status is held on the server. iid = self._call('iterator_start', start, stop) return self._setup_iterator(TransactionIterator, iid)
def _setup_iterator(self, factory, iid, *args): self._iterators[iid] = iterator = factory(self, iid, *args) self._iterator_ids.add(iid) return iterator def _forget_iterator(self, iid): self._iterators.pop(iid, None) self._iterator_ids.remove(iid) def _iterator_gc(self, disconnected=False): if not self._iterator_ids: return if disconnected: for i in self._iterators.values(): i._iid = -1 self._iterators.clear() self._iterator_ids.clear() return # Recall that self._iterators is a WeakValueDictionary. Under # non-refcounted implementations like PyPy, this means that # unreachable iterators (and their IDs) may still be in this # map for some arbitrary period of time (until the next # garbage collection occurs.) This is fine: the server # supports being asked to GC the same iterator ID more than # once. Iterator ids can be reused, but only after a server # restart, after which we had already been called with # `disconnected` True and so had cleared out our map anyway, # plus we simply replace whatever is in the map if we get a # duplicate id---and duplicates at that point would be dead # objects waiting to be cleaned up. So there's never any risk # of confusing TransactionIterator objects that are in use. iids = self._iterator_ids - set(self._iterators) # let tests know we've been called: self._iterators._last_gc = time.time() if iids: try: self._async('iterator_gc', list(iids)) except ClientDisconnected: # If we get disconnected, all of the iterators on the # server are thrown away. We should clear ours too: return self._iterator_gc(True) self._iterator_ids -= iids
[docs] def server_status(self, timeout=None): """Retrieve status dictionary from the server. (The timeout keyword argument is for tests) """ return self._call('server_status', timeout=timeout)
class TransactionIterator(object): def __init__(self, storage, iid, *args): self._storage = storage self._iid = iid self._ended = False def __iter__(self): return self def __next__(self): if self._ended: raise StopIteration() if self._iid < 0: raise ClientDisconnected("Disconnected iterator") tx_data = self._storage._call('iterator_next', self._iid) if tx_data is None: # The iterator is exhausted, and the server has already # disposed it. self._ended = True self._storage._forget_iterator(self._iid) raise StopIteration() return ClientStorageTransactionInformation( self._storage, self, *tx_data) next = __next__ class ClientStorageTransactionInformation(ZODB.BaseStorage.TransactionRecord): def __init__(self, storage, txiter, tid, status, user, description, extension): self._storage = storage self._txiter = txiter self._completed = False self._riid = None self.tid = tid self.status = status self.user = user self.description = description self.extension = extension def __iter__(self): riid = self._storage._call('iterator_record_start', self._txiter._iid, self.tid) return self._storage._setup_iterator(RecordIterator, riid) class RecordIterator(object): def __init__(self, storage, riid): self._riid = riid self._completed = False self._storage = storage def __iter__(self): return self def __next__(self): if self._completed: # We finished iteration once already and the server can't know # about the iteration anymore. raise StopIteration() item = self._storage._call('iterator_record_next', self._riid) if item is None: # The iterator is exhausted, and the server has already # disposed it. self._completed = True raise StopIteration() return ZODB.BaseStorage.DataRecord(*item) next = __next__ class BlobCacheLayout(object): size = 997 def oid_to_path(self, oid): return str(utils.u64(oid) % self.size) def getBlobFilePath(self, oid, tid): base, rem = divmod(utils.u64(oid), self.size) return os.path.join( str(rem), "%s.%s%s" % (base, hexlify(tid).decode('ascii'), ZODB.blob.BLOB_SUFFIX) ) def _accessed(filename): try: os.utime(filename, (time.time(), os.stat(filename).st_mtime)) except OSError: pass # We tried. :) return filename cache_file_name = re.compile(r'\d+$').match def _check_blob_cache_size(blob_dir, target): logger = logging.getLogger(__name__+'.check_blob_cache') with open(os.path.join(blob_dir, ZODB.blob.LAYOUT_MARKER)) as layout_file: layout = if not layout == 'zeocache': logger.critical("Invalid blob directory layout %s", layout) raise ValueError("Invalid blob directory layout", layout) attempt_path = os.path.join(blob_dir, 'check_size.attempt') try: check_lock = zc.lockfile.LockFile( os.path.join(blob_dir, 'check_size.lock')) except zc.lockfile.LockError: try: time.sleep(1) check_lock = zc.lockfile.LockFile( os.path.join(blob_dir, 'check_size.lock')) except zc.lockfile.LockError: # Someone is already cleaning up, so don't bother logger.debug("%s Another thread is checking the blob cache size.", get_ident()) open(attempt_path, 'w').close() # Mark that we tried return logger.debug("%s Checking blob cache size. (target: %s)", get_ident(), target) try: while 1: size = 0 blob_suffix = ZODB.blob.BLOB_SUFFIX files_by_atime = BTrees.OOBTree.BTree() for dirname in os.listdir(blob_dir): if not cache_file_name(dirname): continue base = os.path.join(blob_dir, dirname) if not os.path.isdir(base): continue for file_name in os.listdir(base): if not file_name.endswith(blob_suffix): continue file_path = os.path.join(base, file_name) if not os.path.isfile(file_path): continue stat = os.stat(file_path) size += stat.st_size t = stat.st_atime if t not in files_by_atime: files_by_atime[t] = [] files_by_atime[t].append(os.path.join(dirname, file_name)) logger.debug("%s blob cache size: %s", get_ident(), size) if size <= target: if os.path.isfile(attempt_path): try: os.remove(attempt_path) except OSError: pass # Sigh, windows continue logger.debug("%s -->", get_ident()) break while size > target and files_by_atime: for file_name in files_by_atime.pop(files_by_atime.minKey()): file_name = os.path.join(blob_dir, file_name) lockfilename = os.path.join(os.path.dirname(file_name), '.lock') try: lock = zc.lockfile.LockFile(lockfilename) except zc.lockfile.LockError: logger.debug("%s Skipping locked %s", get_ident(), os.path.basename(file_name)) continue # In use, skip try: fsize = os.stat(file_name).st_size try: ZODB.blob.remove_committed(file_name) except OSError: pass # probably open on windows else: size -= fsize finally: lock.close() if size <= target: break logger.debug("%s reduced blob cache size: %s", get_ident(), size) finally: check_lock.close() def check_blob_size_script(args=None): if args is None: args = sys.argv[1:] blob_dir, target = args _check_blob_cache_size(blob_dir, int(target)) class _FileLock: """Auxiliary class to provide for file lock logging.""" def __init__(self, filename, log_failure): self.filename = filename try: self.lock = zc.lockfile.LockFile(filename) logger.debug("locked %s", filename) except zc.lockfile.LockError: if log_failure: logger.debug("failed to lock %s", filename) raise def close(self): self.lock.close() logger.debug("unlocked %s", self.filename) def _lock_blob(path): lockfilename = os.path.join(os.path.dirname(path), '.lock') n = 0 while 1: try: return _FileLock(lockfilename, n == 0) except zc.lockfile.LockError: time.sleep(0.01) n += 1 if n > 60000: raise else: break def open_cache(cache, var, client, storage, cache_size): if isinstance(cache, (None.__class__, str)): from ZEO.cache import ClientCache if cache is None: if client: cache = os.path.join(var or os.getcwd(), "%s-%s.zec" % (client, storage)) else: # ephemeral cache return ClientCache(None, cache_size) cache = ClientCache(cache, cache_size) return cache