1 : /* Background I/O service for Redis.
2 : *
3 : * This file implements operations that we need to perform in the background.
4 : * Currently there is only a single operation, that is a background close(2)
5 : * system call. This is needed as when the process is the last owner of a
6 : * reference to a file closing it means unlinking it, and the deletion of the
7 : * file is slow, blocking the server.
8 : *
9 : * In the future we'll either continue implementing new things we need or
10 : * we'll switch to libeio. However there are probably long term uses for this
11 : * file as we may want to put here Redis specific background tasks (for instance
12 : * it is not impossible that we'll need a non blocking FLUSHDB/FLUSHALL
13 : * implementation).
14 : *
15 : * DESIGN
16 : * ------
17 : *
18 : * The design is trivial, we have a structure representing a job to perform
19 : * and a different thread and job queue for every job type.
20 : * Every thread wait for new jobs in its queue, and process every job
21 : * sequentially.
22 : *
23 : * Jobs of the same type are guaranteed to be processed from the least
24 : * recently inserted to the most recently inserted (older jobs processed
25 : * first).
26 : *
27 : * Currently there is no way for the creator of the job to be notified about
28 : * the completion of the operation, this will only be added when/if needed.
29 : */
30 :
31 : #include "redis.h"
32 : #include "bio.h"
33 :
34 : static pthread_mutex_t bio_mutex[REDIS_BIO_NUM_OPS];
35 : static pthread_cond_t bio_condvar[REDIS_BIO_NUM_OPS];
36 : static list *bio_jobs[REDIS_BIO_NUM_OPS];
37 : /* The following array is used to hold the number of pending jobs for every
38 : * OP type. This allows us to export the bioPendingJobsOfType() API that is
39 : * useful when the main thread wants to perform some operation that may involve
40 : * objects shared with the background thread. The main thread will just wait
41 : * that there are no longer jobs of this type to be executed before performing
42 : * the sensible operation. This data is also useful for reporting. */
43 : static unsigned long long bio_pending[REDIS_BIO_NUM_OPS];
44 :
45 : /* This structure represents a background Job. It is only used locally to this
46 : * file as the API deos not expose the internals at all. */
47 : struct bio_job {
48 : time_t time; /* Time at which the job was created. */
49 : /* Job specific arguments pointers. If we need to pass more than three
50 : * arguments we can just pass a pointer to a structure or alike. */
51 : void *arg1, *arg2, *arg3;
52 : };
53 :
54 : void *bioProcessBackgroundJobs(void *arg);
55 :
56 : /* Make sure we have enough stack to perform all the things we do in the
57 : * main thread. */
58 : #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
59 :
60 : /* Initialize the background system, spawning the thread. */
61 53 : void bioInit(void) {
62 : pthread_attr_t attr;
63 : pthread_t thread;
64 : size_t stacksize;
65 : int j;
66 :
67 : /* Initialization of state vars and objects */
68 159 : for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
69 106 : pthread_mutex_init(&bio_mutex[j],NULL);
70 106 : pthread_cond_init(&bio_condvar[j],NULL);
71 106 : bio_jobs[j] = listCreate();
72 106 : bio_pending[j] = 0;
73 : }
74 :
75 : /* Set the stack size as by default it may be small in some system */
76 53 : pthread_attr_init(&attr);
77 53 : pthread_attr_getstacksize(&attr,&stacksize);
78 53 : if (!stacksize) stacksize = 1; /* The world is full of Solaris Fixes */
79 53 : while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2;
80 53 : pthread_attr_setstacksize(&attr, stacksize);
81 :
82 : /* Ready to spawn our threads. We use the single argument the thread
83 : * function accepts in order to pass the job ID the thread is
84 : * responsible of. */
85 159 : for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
86 106 : void *arg = (void*)(unsigned long) j;
87 106 : if (pthread_create(&thread,&attr,bioProcessBackgroundJobs,arg) != 0) {
88 0 : redisLog(REDIS_WARNING,"Fatal: Can't initialize Background Jobs.");
89 0 : exit(1);
90 : }
91 : }
92 53 : }
93 :
94 19 : void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
95 19 : struct bio_job *job = zmalloc(sizeof(*job));
96 :
97 19 : job->time = time(NULL);
98 19 : job->arg1 = arg1;
99 19 : job->arg2 = arg2;
100 19 : job->arg3 = arg3;
101 19 : pthread_mutex_lock(&bio_mutex[type]);
102 19 : listAddNodeTail(bio_jobs[type],job);
103 19 : bio_pending[type]++;
104 19 : pthread_cond_signal(&bio_condvar[type]);
105 19 : pthread_mutex_unlock(&bio_mutex[type]);
106 19 : }
107 :
108 100 : void *bioProcessBackgroundJobs(void *arg) {
109 : struct bio_job *job;
110 100 : unsigned long type = (unsigned long) arg;
111 : sigset_t sigset;
112 :
113 100 : pthread_detach(pthread_self());
114 105 : pthread_mutex_lock(&bio_mutex[type]);
115 : /* Block SIGALRM so we are sure that only the main thread will
116 : * receive the watchdog signal. */
117 106 : sigemptyset(&sigset);
118 100 : sigaddset(&sigset, SIGALRM);
119 98 : if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
120 0 : redisLog(REDIS_WARNING,
121 : "Warning: can't mask SIGALRM in bio.c thread: %s", strerror(errno));
122 :
123 : while(1) {
124 : listNode *ln;
125 :
126 : /* The loop always starts with the lock hold. */
127 144 : if (listLength(bio_jobs[type]) == 0) {
128 125 : pthread_cond_wait(&bio_condvar[type],&bio_mutex[type]);
129 19 : continue;
130 : }
131 : /* Pop the job from the queue. */
132 19 : ln = listFirst(bio_jobs[type]);
133 19 : job = ln->value;
134 : /* It is now possible to unlock the background system as we know have
135 : * a stand alone job structure to process.*/
136 19 : pthread_mutex_unlock(&bio_mutex[type]);
137 :
138 : /* Process the job accordingly to its type. */
139 19 : if (type == REDIS_BIO_CLOSE_FILE) {
140 17 : close((long)job->arg1);
141 2 : } else if (type == REDIS_BIO_AOF_FSYNC) {
142 2 : aof_fsync((long)job->arg1);
143 : } else {
144 0 : redisPanic("Wrong job type in bioProcessBackgroundJobs().");
145 : }
146 19 : zfree(job);
147 :
148 : /* Lock again before reiterating the loop, if there are no longer
149 : * jobs to process we'll block again in pthread_cond_wait(). */
150 19 : pthread_mutex_lock(&bio_mutex[type]);
151 19 : listDelNode(bio_jobs[type],ln);
152 19 : bio_pending[type]--;
153 : }
154 : }
155 :
156 : /* Return the number of pending jobs of the specified type. */
157 6 : unsigned long long bioPendingJobsOfType(int type) {
158 : unsigned long long val;
159 6 : pthread_mutex_lock(&bio_mutex[type]);
160 6 : val = bio_pending[type];
161 6 : pthread_mutex_unlock(&bio_mutex[type]);
162 6 : return val;
163 : }
164 :
165 : #if 0 /* We don't use the following code for now, and bioWaitPendingJobsLE
166 : probably needs a rewrite using conditional variables instead of the
167 : current implementation. */
168 :
169 :
170 : /* Wait until the number of pending jobs of the specified type are
171 : * less or equal to the specified number.
172 : *
173 : * This function may block for long time, it should only be used to perform
174 : * the following tasks:
175 : *
176 : * 1) To avoid that the main thread is pushing jobs of a given time so fast
177 : * that the background thread can't process them at the same speed.
178 : * So before creating a new job of a given type the main thread should
179 : * call something like: bioWaitPendingJobsLE(job_type,10000);
180 : * 2) In order to perform special operations that make it necessary to be sure
181 : * no one is touching shared resourced in the background.
182 : */
183 : void bioWaitPendingJobsLE(int type, unsigned long long num) {
184 : unsigned long long iteration = 0;
185 :
186 : /* We poll the jobs queue aggressively to start, and gradually relax
187 : * the polling speed if it is going to take too much time. */
188 : while(1) {
189 : iteration++;
190 : if (iteration > 1000 && iteration <= 10000) {
191 : usleep(100);
192 : } else if (iteration > 10000) {
193 : usleep(1000);
194 : }
195 : if (bioPendingJobsOfType(type) <= num) break;
196 : }
197 : }
198 :
199 : /* Return the older job of the specified type. */
200 : time_t bioOlderJobOfType(int type) {
201 : time_t time;
202 : listNode *ln;
203 : struct bio_job *job;
204 :
205 : pthread_mutex_lock(&bio_mutex[type]);
206 : ln = listFirst(bio_jobs[type]);
207 : if (ln == NULL) {
208 : pthread_mutex_unlock(&bio_mutex[type]);
209 : return 0;
210 : }
211 : job = ln->value;
212 : time = job->time;
213 : pthread_mutex_unlock(&bio_mutex[type]);
214 : return time;
215 : }
216 :
217 : #endif
|