1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (c) International Business Machines Corp., 2006 4 * 5 * Authors: Artem Bityutskiy ( ), Thomas Gleixner 6 */ 7 8 /* 9 * UBI wear-leveling sub-system. 10 * 11 * This sub-system is responsible for wear-leveling. It works in terms of 12 * physical eraseblocks and erase counters and knows nothing about logical 13 * eraseblocks, volumes, etc. From this sub-system's perspective all physical 14 * eraseblocks are of two types - used and free. Used physical eraseblocks are 15 * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical 16 * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function. 17 * 18 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter 19 * header. The rest of the physical eraseblock contains only %0xFF bytes. 20 * 21 * When physical eraseblocks are returned to the WL sub-system by means of the 22 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is 23 * done asynchronously in context of the per-UBI device background thread, 24 * which is also managed by the WL sub-system. 25 * 26 * The wear-leveling is ensured by means of moving the contents of used 27 * physical eraseblocks with low erase counter to free physical eraseblocks 28 * with high erase counter. 29 * 30 * If the WL sub-system fails to erase a physical eraseblock, it marks it as 31 * bad. 32 * 33 * This sub-system is also responsible for scrubbing. If a bit-flip is detected 34 * in a physical eraseblock, it has to be moved. Technically this is the same 35 * as moving it for wear-leveling reasons. 36 * 37 * As it was said, for the UBI sub-system all physical eraseblocks are either 38 * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while 39 * used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub 40 * RB-trees, as well as (temporarily) in the @wl->pq queue. 41 * 42 * When the WL sub-system returns a physical eraseblock, the physical 43 * eraseblock is protected from being moved for some "time". For this reason, 44 * the physical eraseblock is not directly moved from the @wl->free tree to the 45 * @wl->used tree. There is a protection queue in between where this 46 * physical eraseblock is temporarily stored (@wl->pq). 47 * 48 * All this protection stuff is needed because: 49 * o we don't want to move physical eraseblocks just after we have given them 50 * to the user; instead, we first want to let users fill them up with data; 51 * 52 * o there is a chance that the user will put the physical eraseblock very 53 * soon, so it makes sense not to move it for some time, but wait. 54 * 55 * Physical eraseblocks stay protected only for limited time. But the "time" is 56 * measured in erase cycles in this case. This is implemented with help of the 57 * protection queue. Eraseblocks are put to the tail of this queue when they 58 * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the 59 * head of the queue on each erase operation (for any eraseblock). So the 60 * length of the queue defines how may (global) erase cycles PEBs are protected. 61 * 62 * To put it differently, each physical eraseblock has 2 main states: free and 63 * used. The former state corresponds to the @wl->free tree. The latter state 64 * is split up on several sub-states: 65 * o the WL movement is allowed (@wl->used tree); 66 * o the WL movement is disallowed (@wl->erroneous) because the PEB is 67 * erroneous - e.g., there was a read error; 68 * o the WL movement is temporarily prohibited (@wl->pq queue); 69 * o scrubbing is needed (@wl->scrub tree). 70 * 71 * Depending on the sub-state, wear-leveling entries of the used physical 72 * eraseblocks may be kept in one of those structures. 73 * 74 * Note, in this implementation, we keep a small in-RAM object for each physical 75 * eraseblock. This is surely not a scalable solution. But it appears to be good 76 * enough for moderately large flashes and it is simple. In future, one may 77 * re-work this sub-system and make it more scalable. 78 * 79 * At the moment this sub-system does not utilize the sequence number, which 80 * was introduced relatively recently. But it would be wise to do this because 81 * the sequence number of a logical eraseblock characterizes how old is it. For 82 * example, when we move a PEB with low erase counter, and we need to pick the 83 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we 84 * pick target PEB with an average EC if our PEB is not very "old". This is a 85 * room for future re-works of the WL sub-system. 86 */ 87 88 #ifndef __UBOOT__ 89 #include <linux/slab.h> 90 #include <linux/crc32.h> 91 #include <linux/freezer.h> 92 #include <linux/kthread.h> 93 #else 94 #include <ubi_uboot.h> 95 #endif 96 97 #include "ubi.h" 98 #include "wl.h" 99 100 /* Number of physical eraseblocks reserved for wear-leveling purposes */ 101 #define WL_RESERVED_PEBS 1 102 103 /* 104 * Maximum difference between two erase counters. If this threshold is 105 * exceeded, the WL sub-system starts moving data from used physical 106 * eraseblocks with low erase counter to free physical eraseblocks with high 107 * erase counter. 108 */ 109 #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD 110 111 /* 112 * When a physical eraseblock is moved, the WL sub-system has to pick the target 113 * physical eraseblock to move to. The simplest way would be just to pick the 114 * one with the highest erase counter. But in certain workloads this could lead 115 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a 116 * situation when the picked physical eraseblock is constantly erased after the 117 * data is written to it. So, we have a constant which limits the highest erase 118 * counter of the free physical eraseblock to pick. Namely, the WL sub-system 119 * does not pick eraseblocks with erase counter greater than the lowest erase 120 * counter plus %WL_FREE_MAX_DIFF. 121 */ 122 #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) 123 124 /* 125 * Maximum number of consecutive background thread failures which is enough to 126 * switch to read-only mode. 127 */ 128 #define WL_MAX_FAILURES 32 129 130 static int self_check_ec(struct ubi_device *ubi, int pnum, int ec); 131 static int self_check_in_wl_tree(const struct ubi_device *ubi, 132 struct ubi_wl_entry *e, struct rb_root *root); 133 static int self_check_in_pq(const struct ubi_device *ubi, 134 struct ubi_wl_entry *e); 135 136 /** 137 * wl_tree_add - add a wear-leveling entry to a WL RB-tree. 138 * @e: the wear-leveling entry to add 139 * @root: the root of the tree 140 * 141 * Note, we use (erase counter, physical eraseblock number) pairs as keys in 142 * the @ubi->used and @ubi->free RB-trees. 143 */ 144 static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) 145 { 146 struct rb_node **p, *parent = NULL; 147 148 p = &root->rb_node; 149 while (*p) { 150 struct ubi_wl_entry *e1; 151 152 parent = *p; 153 e1 = rb_entry(parent, struct ubi_wl_entry, u.rb); 154 155 if (e->ec < e1->ec) 156 p = &(*p)->rb_left; 157 else if (e->ec > e1->ec) 158 p = &(*p)->rb_right; 159 else { 160 ubi_assert(e->pnum != e1->pnum); 161 if (e->pnum < e1->pnum) 162 p = &(*p)->rb_left; 163 else 164 p = &(*p)->rb_right; 165 } 166 } 167 168 rb_link_node(&e->u.rb, parent, p); 169 rb_insert_color(&e->u.rb, root); 170 } 171 172 /** 173 * wl_tree_destroy - destroy a wear-leveling entry. 174 * @ubi: UBI device description object 175 * @e: the wear-leveling entry to add 176 * 177 * This function destroys a wear leveling entry and removes 178 * the reference from the lookup table. 179 */ 180 static void wl_entry_destroy(struct ubi_device *ubi, struct ubi_wl_entry *e) 181 { 182 ubi->lookuptbl[e->pnum] = NULL; 183 kmem_cache_free(ubi_wl_entry_slab, e); 184 } 185 186 /** 187 * do_work - do one pending work. 188 * @ubi: UBI device description object 189 * 190 * This function returns zero in case of success and a negative error code in 191 * case of failure. 192 */ 193 static int do_work(struct ubi_device *ubi) 194 { 195 int err; 196 struct ubi_work *wrk; 197 198 cond_resched(); 199 200 /* 201 * @ubi->work_sem is used to synchronize with the workers. Workers take 202 * it in read mode, so many of them may be doing works at a time. But 203 * the queue flush code has to be sure the whole queue of works is 204 * done, and it takes the mutex in write mode. 205 */ 206 down_read(&ubi->work_sem); 207 spin_lock(&ubi->wl_lock); 208 if (list_empty(&ubi->works)) { 209 spin_unlock(&ubi->wl_lock); 210 up_read(&ubi->work_sem); 211 return 0; 212 } 213 214 wrk = list_entry(ubi->works.next, struct ubi_work, list); 215 list_del(&wrk->list); 216 ubi->works_count -= 1; 217 ubi_assert(ubi->works_count >= 0); 218 spin_unlock(&ubi->wl_lock); 219 220 /* 221 * Call the worker function. Do not touch the work structure 222 * after this call as it will have been freed or reused by that 223 * time by the worker function. 224 */ 225 err = wrk->func(ubi, wrk, 0); 226 if (err) 227 ubi_err(ubi, "work failed with error code %d", err); 228 up_read(&ubi->work_sem); 229 230 return err; 231 } 232 233 /** 234 * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree. 235 * @e: the wear-leveling entry to check 236 * @root: the root of the tree 237 * 238 * This function returns non-zero if @e is in the @root RB-tree and zero if it 239 * is not. 240 */ 241 static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) 242 { 243 struct rb_node *p; 244 245 p = root->rb_node; 246 while (p) { 247 struct ubi_wl_entry *e1; 248 249 e1 = rb_entry(p, struct ubi_wl_entry, u.rb); 250 251 if (e->pnum == e1->pnum) { 252 ubi_assert(e == e1); 253 return 1; 254 } 255 256 if (e->ec < e1->ec) 257 p = p->rb_left; 258 else if (e->ec > e1->ec) 259 p = p->rb_right; 260 else { 261 ubi_assert(e->pnum != e1->pnum); 262 if (e->pnum < e1->pnum) 263 p = p->rb_left; 264 else 265 p = p->rb_right; 266 } 267 } 268 269 return 0; 270 } 271 272 /** 273 * prot_queue_add - add physical eraseblock to the protection queue. 274 * @ubi: UBI device description object 275 * @e: the physical eraseblock to add 276 * 277 * This function adds @e to the tail of the protection queue @ubi->pq, where 278 * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be 279 * temporarily protected from the wear-leveling worker. Note, @wl->lock has to 280 * be locked. 281 */ 282 static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) 283 { 284 int pq_tail = ubi->pq_head - 1; 285 286 if (pq_tail < 0) 287 pq_tail = UBI_PROT_QUEUE_LEN - 1; 288 ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN); 289 list_add_tail(&e->u.list, &ubi->pq[pq_tail]); 290 dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec); 291 } 292 293 /** 294 * find_wl_entry - find wear-leveling entry closest to certain erase counter. 295 * @ubi: UBI device description object 296 * @root: the RB-tree where to look for 297 * @diff: maximum possible difference from the smallest erase counter 298 * 299 * This function looks for a wear leveling entry with erase counter closest to 300 * min + @diff, where min is the smallest erase counter. 301 */ 302 static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi, 303 struct rb_root *root, int diff) 304 { 305 struct rb_node *p; 306 struct ubi_wl_entry *e, *prev_e = NULL; 307 int max; 308 309 e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); 310 max = e->ec + diff; 311 312 p = root->rb_node; 313 while (p) { 314 struct ubi_wl_entry *e1; 315 316 e1 = rb_entry(p, struct ubi_wl_entry, u.rb); 317 if (e1->ec >= max) 318 p = p->rb_left; 319 else { 320 p = p->rb_right; 321 prev_e = e; 322 e = e1; 323 } 324 } 325 326 /* If no fastmap has been written and this WL entry can be used 327 * as anchor PEB, hold it back and return the second best WL entry 328 * such that fastmap can use the anchor PEB later. */ 329 if (prev_e && !ubi->fm_disabled && 330 !ubi->fm && e->pnum < UBI_FM_MAX_START) 331 return prev_e; 332 333 return e; 334 } 335 336 /** 337 * find_mean_wl_entry - find wear-leveling entry with medium erase counter. 338 * @ubi: UBI device description object 339 * @root: the RB-tree where to look for 340 * 341 * This function looks for a wear leveling entry with medium erase counter, 342 * but not greater or equivalent than the lowest erase counter plus 343 * %WL_FREE_MAX_DIFF/2. 344 */ 345 static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi, 346 struct rb_root *root) 347 { 348 struct ubi_wl_entry *e, *first, *last; 349 350 first = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); 351 last = rb_entry(rb_last(root), struct ubi_wl_entry, u.rb); 352 353 if (last->ec - first->ec < WL_FREE_MAX_DIFF) { 354 e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb); 355 356 /* If no fastmap has been written and this WL entry can be used 357 * as anchor PEB, hold it back and return the second best 358 * WL entry such that fastmap can use the anchor PEB later. */ 359 e = may_reserve_for_fm(ubi, e, root); 360 } else 361 e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2); 362 363 return e; 364 } 365 366 /** 367 * wl_get_wle - get a mean wl entry to be used by ubi_wl_get_peb() or 368 * refill_wl_user_pool(). 369 * @ubi: UBI device description object 370 * 371 * This function returns a a wear leveling entry in case of success and 372 * NULL in case of failure. 373 */ 374 static struct ubi_wl_entry *wl_get_wle(struct ubi_device *ubi) 375 { 376 struct ubi_wl_entry *e; 377 378 e = find_mean_wl_entry(ubi, &ubi->free); 379 if (!e) { 380 ubi_err(ubi, "no free eraseblocks"); 381 return NULL; 382 } 383 384 self_check_in_wl_tree(ubi, e, &ubi->free); 385 386 /* 387 * Move the physical eraseblock to the protection queue where it will 388 * be protected from being moved for some time. 389 */ 390 rb_erase(&e->u.rb, &ubi->free); 391 ubi->free_count--; 392 dbg_wl("PEB %d EC %d", e->pnum, e->ec); 393 394 return e; 395 } 396 397 /** 398 * prot_queue_del - remove a physical eraseblock from the protection queue. 399 * @ubi: UBI device description object 400 * @pnum: the physical eraseblock to remove 401 * 402 * This function deletes PEB @pnum from the protection queue and returns zero 403 * in case of success and %-ENODEV if the PEB was not found. 404 */ 405 static int prot_queue_del(struct ubi_device *ubi, int pnum) 406 { 407 struct ubi_wl_entry *e; 408 409 e = ubi->lookuptbl[pnum]; 410 if (!e) 411 return -ENODEV; 412 413 if (self_check_in_pq(ubi, e)) 414 return -ENODEV; 415 416 list_del(&e->u.list); 417 dbg_wl("deleted PEB %d from the protection queue", e->pnum); 418 return 0; 419 } 420 421 /** 422 * sync_erase - synchronously erase a physical eraseblock. 423 * @ubi: UBI device description object 424 * @e: the the physical eraseblock to erase 425 * @torture: if the physical eraseblock has to be tortured 426 * 427 * This function returns zero in case of success and a negative error code in 428 * case of failure. 429 */ 430 static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, 431 int torture) 432 { 433 int err; 434 struct ubi_ec_hdr *ec_hdr; 435 unsigned long long ec = e->ec; 436 437 dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec); 438 439 err = self_check_ec(ubi, e->pnum, e->ec); 440 if (err) 441 return -EINVAL; 442 443 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); 444 if (!ec_hdr) 445 return -ENOMEM; 446 447 err = ubi_io_sync_erase(ubi, e->pnum, torture); 448 if (err < 0) 449 goto out_free; 450 451 ec += err; 452 if (ec > UBI_MAX_ERASECOUNTER) { 453 /* 454 * Erase counter overflow. Upgrade UBI and use 64-bit 455 * erase counters internally. 456 */ 457 ubi_err(ubi, "erase counter overflow at PEB %d, EC %llu", 458 e->pnum, ec); 459 err = -EINVAL; 460 goto out_free; 461 } 462 463 dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec); 464 465 ec_hdr->ec = cpu_to_be64(ec); 466 467 err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); 468 if (err) 469 goto out_free; 470 471 e->ec = ec; 472 spin_lock(&ubi->wl_lock); 473 if (e->ec > ubi->max_ec) 474 ubi->max_ec = e->ec; 475 spin_unlock(&ubi->wl_lock); 476 477 out_free: 478 kfree(ec_hdr); 479 return err; 480 } 481 482 /** 483 * serve_prot_queue - check if it is time to stop protecting PEBs. 484 * @ubi: UBI device description object 485 * 486 * This function is called after each erase operation and removes PEBs from the 487 * tail of the protection queue. These PEBs have been protected for long enough 488 * and should be moved to the used tree. 489 */ 490 static void serve_prot_queue(struct ubi_device *ubi) 491 { 492 struct ubi_wl_entry *e, *tmp; 493 int count; 494 495 /* 496 * There may be several protected physical eraseblock to remove, 497 * process them all. 498 */ 499 repeat: 500 count = 0; 501 spin_lock(&ubi->wl_lock); 502 list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) { 503 dbg_wl("PEB %d EC %d protection over, move to used tree", 504 e->pnum, e->ec); 505 506 list_del(&e->u.list); 507 wl_tree_add(e, &ubi->used); 508 if (count++ > 32) { 509 /* 510 * Let's be nice and avoid holding the spinlock for 511 * too long. 512 */ 513 spin_unlock(&ubi->wl_lock); 514 cond_resched(); 515 goto repeat; 516 } 517 } 518 519 ubi->pq_head += 1; 520 if (ubi->pq_head == UBI_PROT_QUEUE_LEN) 521 ubi->pq_head = 0; 522 ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN); 523 spin_unlock(&ubi->wl_lock); 524 } 525 526 #ifdef __UBOOT__ 527 void ubi_do_worker(struct ubi_device *ubi) 528 { 529 int err; 530 531 if (list_empty(&ubi->works) || ubi->ro_mode || 532 !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) 533 return; 534 535 spin_lock(&ubi->wl_lock); 536 while (!list_empty(&ubi->works)) { 537 /* 538 * call do_work, which executes exactly one work form the queue, 539 * including removeing it from the work queue. 540 */ 541 spin_unlock(&ubi->wl_lock); 542 err = do_work(ubi); 543 spin_lock(&ubi->wl_lock); 544 if (err) { 545 ubi_err(ubi, "%s: work failed with error code %d", 546 ubi->bgt_name, err); 547 } 548 } 549 spin_unlock(&ubi->wl_lock); 550 } 551 #endif 552 553 /** 554 * __schedule_ubi_work - schedule a work. 555 * @ubi: UBI device description object 556 * @wrk: the work to schedule 557 * 558 * This function adds a work defined by @wrk to the tail of the pending works 559 * list. Can only be used if ubi->work_sem is already held in read mode! 560 */ 561 static void __schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) 562 { 563 spin_lock(&ubi->wl_lock); 564 list_add_tail(&wrk->list, &ubi->works); 565 ubi_assert(ubi->works_count >= 0); 566 ubi->works_count += 1; 567 #ifndef __UBOOT__ 568 if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi)) 569 wake_up_process(ubi->bgt_thread); 570 #endif 571 spin_unlock(&ubi->wl_lock); 572 } 573 574 /** 575 * schedule_ubi_work - schedule a work. 576 * @ubi: UBI device description object 577 * @wrk: the work to schedule 578 * 579 * This function adds a work defined by @wrk to the tail of the pending works 580 * list. 581 */ 582 static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) 583 { 584 down_read(&ubi->work_sem); 585 __schedule_ubi_work(ubi, wrk); 586 up_read(&ubi->work_sem); 587 } 588 589 static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, 590 int shutdown); 591 592 /** 593 * schedule_erase - schedule an erase work. 594 * @ubi: UBI device description object 595 * @e: the WL entry of the physical eraseblock to erase 596 * @vol_id: the volume ID that last used this PEB 597 * @lnum: the last used logical eraseblock number for the PEB 598 * @torture: if the physical eraseblock has to be tortured 599 * 600 * This function returns zero in case of success and a %-ENOMEM in case of 601 * failure. 602 */ 603 static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, 604 int vol_id, int lnum, int torture) 605 { 606 struct ubi_work *wl_wrk; 607 608 ubi_assert(e); 609 610 dbg_wl("schedule erasure of PEB %d, EC %d, torture %d", 611 e->pnum, e->ec, torture); 612 613 wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); 614 if (!wl_wrk) 615 return -ENOMEM; 616 617 wl_wrk->func = &erase_worker; 618 wl_wrk->e = e; 619 wl_wrk->vol_id = vol_id; 620 wl_wrk->lnum = lnum; 621 wl_wrk->torture = torture; 622 623 schedule_ubi_work(ubi, wl_wrk); 624 625 #ifdef __UBOOT__ 626 ubi_do_worker(ubi); 627 #endif 628 return 0; 629 } 630 631 /** 632 * do_sync_erase - run the erase worker synchronously. 633 * @ubi: UBI device description object 634 * @e: the WL entry of the physical eraseblock to erase 635 * @vol_id: the volume ID that last used this PEB 636 * @lnum: the last used logical eraseblock number for the PEB 637 * @torture: if the physical eraseblock has to be tortured 638 * 639 */ 640 static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, 641 int vol_id, int lnum, int torture) 642 { 643 struct ubi_work *wl_wrk; 644 645 dbg_wl("sync erase of PEB %i", e->pnum); 646 647 wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); 648 if (!wl_wrk) 649 return -ENOMEM; 650 651 wl_wrk->e = e; 652 wl_wrk->vol_id = vol_id; 653 wl_wrk->lnum = lnum; 654 wl_wrk->torture = torture; 655 656 return erase_worker(ubi, wl_wrk, 0); 657 } 658 659 /** 660 * wear_leveling_worker - wear-leveling worker function. 661 * @ubi: UBI device description object 662 * @wrk: the work object 663 * @shutdown: non-zero if the worker has to free memory and exit 664 * because the WL-subsystem is shutting down 665 * 666 * This function copies a more worn out physical eraseblock to a less worn out 667 * one. Returns zero in case of success and a negative error code in case of 668 * failure. 669 */ 670 static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, 671 int shutdown) 672 { 673 int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0; 674 int vol_id = -1, lnum = -1; 675 #ifdef CONFIG_MTD_UBI_FASTMAP 676 int anchor = wrk->anchor; 677 #endif 678 struct ubi_wl_entry *e1, *e2; 679 struct ubi_vid_hdr *vid_hdr; 680 681 kfree(wrk); 682 if (shutdown) 683 return 0; 684 685 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); 686 if (!vid_hdr) 687 return -ENOMEM; 688 689 mutex_lock(&ubi->move_mutex); 690 spin_lock(&ubi->wl_lock); 691 ubi_assert(!ubi->move_from && !ubi->move_to); 692 ubi_assert(!ubi->move_to_put); 693 694 if (!ubi->free.rb_node || 695 (!ubi->used.rb_node && !ubi->scrub.rb_node)) { 696 /* 697 * No free physical eraseblocks? Well, they must be waiting in 698 * the queue to be erased. Cancel movement - it will be 699 * triggered again when a free physical eraseblock appears. 700 * 701 * No used physical eraseblocks? They must be temporarily 702 * protected from being moved. They will be moved to the 703 * @ubi->used tree later and the wear-leveling will be 704 * triggered again. 705 */ 706 dbg_wl("cancel WL, a list is empty: free %d, used %d", 707 !ubi->free.rb_node, !ubi->used.rb_node); 708 goto out_cancel; 709 } 710 711 #ifdef CONFIG_MTD_UBI_FASTMAP 712 /* Check whether we need to produce an anchor PEB */ 713 if (!anchor) 714 anchor = !anchor_pebs_avalible(&ubi->free); 715 716 if (anchor) { 717 e1 = find_anchor_wl_entry(&ubi->used); 718 if (!e1) 719 goto out_cancel; 720 e2 = get_peb_for_wl(ubi); 721 if (!e2) 722 goto out_cancel; 723 724 self_check_in_wl_tree(ubi, e1, &ubi->used); 725 rb_erase(&e1->u.rb, &ubi->used); 726 dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum); 727 } else if (!ubi->scrub.rb_node) { 728 #else 729 if (!ubi->scrub.rb_node) { 730 #endif 731 /* 732 * Now pick the least worn-out used physical eraseblock and a 733 * highly worn-out free physical eraseblock. If the erase 734 * counters differ much enough, start wear-leveling. 735 */ 736 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); 737 e2 = get_peb_for_wl(ubi); 738 if (!e2) 739 goto out_cancel; 740 741 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { 742 dbg_wl("no WL needed: min used EC %d, max free EC %d", 743 e1->ec, e2->ec); 744 745 /* Give the unused PEB back */ 746 wl_tree_add(e2, &ubi->free); 747 ubi->free_count++; 748 goto out_cancel; 749 } 750 self_check_in_wl_tree(ubi, e1, &ubi->used); 751 rb_erase(&e1->u.rb, &ubi->used); 752 dbg_wl("move PEB %d EC %d to PEB %d EC %d", 753 e1->pnum, e1->ec, e2->pnum, e2->ec); 754 } else { 755 /* Perform scrubbing */ 756 scrubbing = 1; 757 e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); 758 e2 = get_peb_for_wl(ubi); 759 if (!e2) 760 goto out_cancel; 761 762 self_check_in_wl_tree(ubi, e1, &ubi->scrub); 763 rb_erase(&e1->u.rb, &ubi->scrub); 764 dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); 765 } 766 767 ubi->move_from = e1; 768 ubi->move_to = e2; 769 spin_unlock(&ubi->wl_lock); 770 771 /* 772 * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum. 773 * We so far do not know which logical eraseblock our physical 774 * eraseblock (@e1) belongs to. We have to read the volume identifier 775 * header first. 776 * 777 * Note, we are protected from this PEB being unmapped and erased. The 778 * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB 779 * which is being moved was unmapped. 780 */ 781 782 err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); 783 if (err && err != UBI_IO_BITFLIPS) { 784 if (err == UBI_IO_FF) { 785 /* 786 * We are trying to move PEB without a VID header. UBI 787 * always write VID headers shortly after the PEB was 788 * given, so we have a situation when it has not yet 789 * had a chance to write it, because it was preempted. 790 * So add this PEB to the protection queue so far, 791 * because presumably more data will be written there 792 * (including the missing VID header), and then we'll 793 * move it. 794 */ 795 dbg_wl("PEB %d has no VID header", e1->pnum); 796 protect = 1; 797 goto out_not_moved; 798 } else if (err == UBI_IO_FF_BITFLIPS) { 799 /* 800 * The same situation as %UBI_IO_FF, but bit-flips were 801 * detected. It is better to schedule this PEB for 802 * scrubbing. 803 */ 804 dbg_wl("PEB %d has no VID header but has bit-flips", 805 e1->pnum); 806 scrubbing = 1; 807 goto out_not_moved; 808 } 809 810 ubi_err(ubi, "error %d while reading VID header from PEB %d", 811 err, e1->pnum); 812 goto out_error; 813 } 814 815 vol_id = be32_to_cpu(vid_hdr->vol_id); 816 lnum = be32_to_cpu(vid_hdr->lnum); 817 818 err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); 819 if (err) { 820 if (err == MOVE_CANCEL_RACE) { 821 /* 822 * The LEB has not been moved because the volume is 823 * being deleted or the PEB has been put meanwhile. We 824 * should prevent this PEB from being selected for 825 * wear-leveling movement again, so put it to the 826 * protection queue. 827 */ 828 protect = 1; 829 goto out_not_moved; 830 } 831 if (err == MOVE_RETRY) { 832 scrubbing = 1; 833 goto out_not_moved; 834 } 835 if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR || 836 err == MOVE_TARGET_RD_ERR) { 837 /* 838 * Target PEB had bit-flips or write error - torture it. 839 */ 840 torture = 1; 841 goto out_not_moved; 842 } 843 844 if (err == MOVE_SOURCE_RD_ERR) { 845 /* 846 * An error happened while reading the source PEB. Do 847 * not switch to R/O mode in this case, and give the 848 * upper layers a possibility to recover from this, 849 * e.g. by unmapping corresponding LEB. Instead, just 850 * put this PEB to the @ubi->erroneous list to prevent 851 * UBI from trying to move it over and over again. 852 */ 853 if (ubi->erroneous_peb_count > ubi->max_erroneous) { 854 ubi_err(ubi, "too many erroneous eraseblocks (%d)", 855 ubi->erroneous_peb_count); 856 goto out_error; 857 } 858 erroneous = 1; 859 goto out_not_moved; 860 } 861 862 if (err < 0) 863 goto out_error; 864 865 ubi_assert(0); 866 } 867 868 /* The PEB has been successfully moved */ 869 if (scrubbing) 870 ubi_msg(ubi, "scrubbed PEB %d (LEB %d:%d), data moved to PEB %d", 871 e1->pnum, vol_id, lnum, e2->pnum); 872 ubi_free_vid_hdr(ubi, vid_hdr); 873 874 spin_lock(&ubi->wl_lock); 875 if (!ubi->move_to_put) { 876 wl_tree_add(e2, &ubi->used); 877 e2 = NULL; 878 } 879 ubi->move_from = ubi->move_to = NULL; 880 ubi->move_to_put = ubi->wl_scheduled = 0; 881 spin_unlock(&ubi->wl_lock); 882 883 err = do_sync_erase(ubi, e1, vol_id, lnum, 0); 884 if (err) { 885 if (e2) 886 wl_entry_destroy(ubi, e2); 887 goto out_ro; 888 } 889 890 if (e2) { 891 /* 892 * Well, the target PEB was put meanwhile, schedule it for 893 * erasure. 894 */ 895 dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase", 896 e2->pnum, vol_id, lnum); 897 err = do_sync_erase(ubi, e2, vol_id, lnum, 0); 898 if (err) 899 goto out_ro; 900 } 901 902 dbg_wl("done"); 903 mutex_unlock(&ubi->move_mutex); 904 return 0; 905 906 /* 907 * For some reasons the LEB was not moved, might be an error, might be 908 * something else. @e1 was not changed, so return it back. @e2 might 909 * have been changed, schedule it for erasure. 910 */ 911 out_not_moved: 912 if (vol_id != -1) 913 dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)", 914 e1->pnum, vol_id, lnum, e2->pnum, err); 915 else 916 dbg_wl("cancel moving PEB %d to PEB %d (%d)", 917 e1->pnum, e2->pnum, err); 918 spin_lock(&ubi->wl_lock); 919 if (protect) 920 prot_queue_add(ubi, e1); 921 else if (erroneous) { 922 wl_tree_add(e1, &ubi->erroneous); 923 ubi->erroneous_peb_count += 1; 924 } else if (scrubbing) 925 wl_tree_add(e1, &ubi->scrub); 926 else 927 wl_tree_add(e1, &ubi->used); 928 ubi_assert(!ubi->move_to_put); 929 ubi->move_from = ubi->move_to = NULL; 930 ubi->wl_scheduled = 0; 931 spin_unlock(&ubi->wl_lock); 932 933 ubi_free_vid_hdr(ubi, vid_hdr); 934 err = do_sync_erase(ubi, e2, vol_id, lnum, torture); 935 if (err) 936 goto out_ro; 937 938 mutex_unlock(&ubi->move_mutex); 939 return 0; 940 941 out_error: 942 if (vol_id != -1) 943 ubi_err(ubi, "error %d while moving PEB %d to PEB %d", 944 err, e1->pnum, e2->pnum); 945 else 946 ubi_err(ubi, "error %d while moving PEB %d (LEB %d:%d) to PEB %d", 947 err, e1->pnum, vol_id, lnum, e2->pnum); 948 spin_lock(&ubi->wl_lock); 949 ubi->move_from = ubi->move_to = NULL; 950 ubi->move_to_put = ubi->wl_scheduled = 0; 951 spin_unlock(&ubi->wl_lock); 952 953 ubi_free_vid_hdr(ubi, vid_hdr); 954 wl_entry_destroy(ubi, e1); 955 wl_entry_destroy(ubi, e2); 956 957 out_ro: 958 ubi_ro_mode(ubi); 959 mutex_unlock(&ubi->move_mutex); 960 ubi_assert(err != 0); 961 return err < 0 ? err : -EIO; 962 963 out_cancel: 964 ubi->wl_scheduled = 0; 965 spin_unlock(&ubi->wl_lock); 966 mutex_unlock(&ubi->move_mutex); 967 ubi_free_vid_hdr(ubi, vid_hdr); 968 return 0; 969 } 970 971 /** 972 * ensure_wear_leveling - schedule wear-leveling if it is needed. 973 * @ubi: UBI device description object 974 * @nested: set to non-zero if this function is called from UBI worker 975 * 976 * This function checks if it is time to start wear-leveling and schedules it 977 * if yes. This function returns zero in case of success and a negative error 978 * code in case of failure. 979 */ 980 static int ensure_wear_leveling(struct ubi_device *ubi, int nested) 981 { 982 int err = 0; 983 struct ubi_wl_entry *e1; 984 struct ubi_wl_entry *e2; 985 struct ubi_work *wrk; 986 987 spin_lock(&ubi->wl_lock); 988 if (ubi->wl_scheduled) 989 /* Wear-leveling is already in the work queue */ 990 goto out_unlock; 991 992 /* 993 * If the ubi->scrub tree is not empty, scrubbing is needed, and the 994 * the WL worker has to be scheduled anyway. 995 */ 996 if (!ubi->scrub.rb_node) { 997 if (!ubi->used.rb_node || !ubi->free.rb_node) 998 /* No physical eraseblocks - no deal */ 999 goto out_unlock; 1000 1001 /* 1002 * We schedule wear-leveling only if the difference between the 1003 * lowest erase counter of used physical eraseblocks and a high 1004 * erase counter of free physical eraseblocks is greater than 1005 * %UBI_WL_THRESHOLD. 1006 */ 1007 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); 1008 e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); 1009 1010 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) 1011 goto out_unlock; 1012 dbg_wl("schedule wear-leveling"); 1013 } else 1014 dbg_wl("schedule scrubbing"); 1015 1016 ubi->wl_scheduled = 1; 1017 spin_unlock(&ubi->wl_lock); 1018 1019 wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); 1020 if (!wrk) { 1021 err = -ENOMEM; 1022 goto out_cancel; 1023 } 1024 1025 wrk->anchor = 0; 1026 wrk->func = &wear_leveling_worker; 1027 if (nested) 1028 __schedule_ubi_work(ubi, wrk); 1029 #ifndef __UBOOT__ 1030 else 1031 schedule_ubi_work(ubi, wrk); 1032 #else 1033 else { 1034 schedule_ubi_work(ubi, wrk); 1035 ubi_do_worker(ubi); 1036 } 1037 #endif 1038 return err; 1039 1040 out_cancel: 1041 spin_lock(&ubi->wl_lock); 1042 ubi->wl_scheduled = 0; 1043 out_unlock: 1044 spin_unlock(&ubi->wl_lock); 1045 return err; 1046 } 1047 1048 /** 1049 * erase_worker - physical eraseblock erase worker function. 1050 * @ubi: UBI device description object 1051 * @wl_wrk: the work object 1052 * @shutdown: non-zero if the worker has to free memory and exit 1053 * because the WL sub-system is shutting down 1054 * 1055 * This function erases a physical eraseblock and perform torture testing if 1056 * needed. It also takes care about marking the physical eraseblock bad if 1057 * needed. Returns zero in case of success and a negative error code in case of 1058 * failure. 1059 */ 1060 static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, 1061 int shutdown) 1062 { 1063 struct ubi_wl_entry *e = wl_wrk->e; 1064 int pnum = e->pnum; 1065 int vol_id = wl_wrk->vol_id; 1066 int lnum = wl_wrk->lnum; 1067 int err, available_consumed = 0; 1068 1069 if (shutdown) { 1070 dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec); 1071 kfree(wl_wrk); 1072 wl_entry_destroy(ubi, e); 1073 return 0; 1074 } 1075 1076 dbg_wl("erase PEB %d EC %d LEB %d:%d", 1077 pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum); 1078 1079 err = sync_erase(ubi, e, wl_wrk->torture); 1080 if (!err) { 1081 /* Fine, we've erased it successfully */ 1082 kfree(wl_wrk); 1083 1084 spin_lock(&ubi->wl_lock); 1085 wl_tree_add(e, &ubi->free); 1086 ubi->free_count++; 1087 spin_unlock(&ubi->wl_lock); 1088 1089 /* 1090 * One more erase operation has happened, take care about 1091 * protected physical eraseblocks. 1092 */ 1093 serve_prot_queue(ubi); 1094 1095 /* And take care about wear-leveling */ 1096 err = ensure_wear_leveling(ubi, 1); 1097 return err; 1098 } 1099 1100 ubi_err(ubi, "failed to erase PEB %d, error %d", pnum, err); 1101 kfree(wl_wrk); 1102 1103 if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || 1104 err == -EBUSY) { 1105 int err1; 1106 1107 /* Re-schedule the LEB for erasure */ 1108 err1 = schedule_erase(ubi, e, vol_id, lnum, 0); 1109 if (err1) { 1110 err = err1; 1111 goto out_ro; 1112 } 1113 return err; 1114 } 1115 1116 wl_entry_destroy(ubi, e); 1117 if (err != -EIO) 1118 /* 1119 * If this is not %-EIO, we have no idea what to do. Scheduling 1120 * this physical eraseblock for erasure again would cause 1121 * errors again and again. Well, lets switch to R/O mode. 1122 */ 1123 goto out_ro; 1124 1125 /* It is %-EIO, the PEB went bad */ 1126 1127 if (!ubi->bad_allowed) { 1128 ubi_err(ubi, "bad physical eraseblock %d detected", pnum); 1129 goto out_ro; 1130 } 1131 1132 spin_lock(&ubi->volumes_lock); 1133 if (ubi->beb_rsvd_pebs == 0) { 1134 if (ubi->avail_pebs == 0) { 1135 spin_unlock(&ubi->volumes_lock); 1136 ubi_err(ubi, "no reserved/available physical eraseblocks"); 1137 goto out_ro; 1138 } 1139 ubi->avail_pebs -= 1; 1140 available_consumed = 1; 1141 } 1142 spin_unlock(&ubi->volumes_lock); 1143 1144 ubi_msg(ubi, "mark PEB %d as bad", pnum); 1145 err = ubi_io_mark_bad(ubi, pnum); 1146 if (err) 1147 goto out_ro; 1148 1149 spin_lock(&ubi->volumes_lock); 1150 if (ubi->beb_rsvd_pebs > 0) { 1151 if (available_consumed) { 1152 /* 1153 * The amount of reserved PEBs increased since we last 1154 * checked. 1155 */ 1156 ubi->avail_pebs += 1; 1157 available_consumed = 0; 1158 } 1159 ubi->beb_rsvd_pebs -= 1; 1160 } 1161 ubi->bad_peb_count += 1; 1162 ubi->good_peb_count -= 1; 1163 ubi_calculate_reserved(ubi); 1164 if (available_consumed) 1165 ubi_warn(ubi, "no PEBs in the reserved pool, used an available PEB"); 1166 else if (ubi->beb_rsvd_pebs) 1167 ubi_msg(ubi, "%d PEBs left in the reserve", 1168 ubi->beb_rsvd_pebs); 1169 else 1170 ubi_warn(ubi, "last PEB from the reserve was used"); 1171 spin_unlock(&ubi->volumes_lock); 1172 1173 return err; 1174 1175 out_ro: 1176 if (available_consumed) { 1177 spin_lock(&ubi->volumes_lock); 1178 ubi->avail_pebs += 1; 1179 spin_unlock(&ubi->volumes_lock); 1180 } 1181 ubi_ro_mode(ubi); 1182 return err; 1183 } 1184 1185 /** 1186 * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system. 1187 * @ubi: UBI device description object 1188 * @vol_id: the volume ID that last used this PEB 1189 * @lnum: the last used logical eraseblock number for the PEB 1190 * @pnum: physical eraseblock to return 1191 * @torture: if this physical eraseblock has to be tortured 1192 * 1193 * This function is called to return physical eraseblock @pnum to the pool of 1194 * free physical eraseblocks. The @torture flag has to be set if an I/O error 1195 * occurred to this @pnum and it has to be tested. This function returns zero 1196 * in case of success, and a negative error code in case of failure. 1197 */ 1198 int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum, 1199 int pnum, int torture) 1200 { 1201 int err; 1202 struct ubi_wl_entry *e; 1203 1204 dbg_wl("PEB %d", pnum); 1205 ubi_assert(pnum >= 0); 1206 ubi_assert(pnum < ubi->peb_count); 1207 1208 down_read(&ubi->fm_protect); 1209 1210 retry: 1211 spin_lock(&ubi->wl_lock); 1212 e = ubi->lookuptbl[pnum]; 1213 if (e == ubi->move_from) { 1214 /* 1215 * User is putting the physical eraseblock which was selected to 1216 * be moved. It will be scheduled for erasure in the 1217 * wear-leveling worker. 1218 */ 1219 dbg_wl("PEB %d is being moved, wait", pnum); 1220 spin_unlock(&ubi->wl_lock); 1221 1222 /* Wait for the WL worker by taking the @ubi->move_mutex */ 1223 mutex_lock(&ubi->move_mutex); 1224 mutex_unlock(&ubi->move_mutex); 1225 goto retry; 1226 } else if (e == ubi->move_to) { 1227 /* 1228 * User is putting the physical eraseblock which was selected 1229 * as the target the data is moved to. It may happen if the EBA 1230 * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()' 1231 * but the WL sub-system has not put the PEB to the "used" tree 1232 * yet, but it is about to do this. So we just set a flag which 1233 * will tell the WL worker that the PEB is not needed anymore 1234 * and should be scheduled for erasure. 1235 */ 1236 dbg_wl("PEB %d is the target of data moving", pnum); 1237 ubi_assert(!ubi->move_to_put); 1238 ubi->move_to_put = 1; 1239 spin_unlock(&ubi->wl_lock); 1240 up_read(&ubi->fm_protect); 1241 return 0; 1242 } else { 1243 if (in_wl_tree(e, &ubi->used)) { 1244 self_check_in_wl_tree(ubi, e, &ubi->used); 1245 rb_erase(&e->u.rb, &ubi->used); 1246 } else if (in_wl_tree(e, &ubi->scrub)) { 1247 self_check_in_wl_tree(ubi, e, &ubi->scrub); 1248 rb_erase(&e->u.rb, &ubi->scrub); 1249 } else if (in_wl_tree(e, &ubi->erroneous)) { 1250 self_check_in_wl_tree(ubi, e, &ubi->erroneous); 1251 rb_erase(&e->u.rb, &ubi->erroneous); 1252 ubi->erroneous_peb_count -= 1; 1253 ubi_assert(ubi->erroneous_peb_count >= 0); 1254 /* Erroneous PEBs should be tortured */ 1255 torture = 1; 1256 } else { 1257 err = prot_queue_del(ubi, e->pnum); 1258 if (err) { 1259 ubi_err(ubi, "PEB %d not found", pnum); 1260 ubi_ro_mode(ubi); 1261 spin_unlock(&ubi->wl_lock); 1262 up_read(&ubi->fm_protect); 1263 return err; 1264 } 1265 } 1266 } 1267 spin_unlock(&ubi->wl_lock); 1268 1269 err = schedule_erase(ubi, e, vol_id, lnum, torture); 1270 if (err) { 1271 spin_lock(&ubi->wl_lock); 1272 wl_tree_add(e, &ubi->used); 1273 spin_unlock(&ubi->wl_lock); 1274 } 1275 1276 up_read(&ubi->fm_protect); 1277 return err; 1278 } 1279 1280 /** 1281 * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing. 1282 * @ubi: UBI device description object 1283 * @pnum: the physical eraseblock to schedule 1284 * 1285 * If a bit-flip in a physical eraseblock is detected, this physical eraseblock 1286 * needs scrubbing. This function schedules a physical eraseblock for 1287 * scrubbing which is done in background. This function returns zero in case of 1288 * success and a negative error code in case of failure. 1289 */ 1290 int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) 1291 { 1292 struct ubi_wl_entry *e; 1293 1294 ubi_msg(ubi, "schedule PEB %d for scrubbing", pnum); 1295 1296 retry: 1297 spin_lock(&ubi->wl_lock); 1298 e = ubi->lookuptbl[pnum]; 1299 if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) || 1300 in_wl_tree(e, &ubi->erroneous)) { 1301 spin_unlock(&ubi->wl_lock); 1302 return 0; 1303 } 1304 1305 if (e == ubi->move_to) { 1306 /* 1307 * This physical eraseblock was used to move data to. The data 1308 * was moved but the PEB was not yet inserted to the proper 1309 * tree. We should just wait a little and let the WL worker 1310 * proceed. 1311 */ 1312 spin_unlock(&ubi->wl_lock); 1313 dbg_wl("the PEB %d is not in proper tree, retry", pnum); 1314 yield(); 1315 goto retry; 1316 } 1317 1318 if (in_wl_tree(e, &ubi->used)) { 1319 self_check_in_wl_tree(ubi, e, &ubi->used); 1320 rb_erase(&e->u.rb, &ubi->used); 1321 } else { 1322 int err; 1323 1324 err = prot_queue_del(ubi, e->pnum); 1325 if (err) { 1326 ubi_err(ubi, "PEB %d not found", pnum); 1327 ubi_ro_mode(ubi); 1328 spin_unlock(&ubi->wl_lock); 1329 return err; 1330 } 1331 } 1332 1333 wl_tree_add(e, &ubi->scrub); 1334 spin_unlock(&ubi->wl_lock); 1335 1336 /* 1337 * Technically scrubbing is the same as wear-leveling, so it is done 1338 * by the WL worker. 1339 */ 1340 return ensure_wear_leveling(ubi, 0); 1341 } 1342 1343 /** 1344 * ubi_wl_flush - flush all pending works. 1345 * @ubi: UBI device description object 1346 * @vol_id: the volume id to flush for 1347 * @lnum: the logical eraseblock number to flush for 1348 * 1349 * This function executes all pending works for a particular volume id / 1350 * logical eraseblock number pair. If either value is set to %UBI_ALL, then it 1351 * acts as a wildcard for all of the corresponding volume numbers or logical 1352 * eraseblock numbers. It returns zero in case of success and a negative error 1353 * code in case of failure. 1354 */ 1355 int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum) 1356 { 1357 int err = 0; 1358 int found = 1; 1359 1360 /* 1361 * Erase while the pending works queue is not empty, but not more than 1362 * the number of currently pending works. 1363 */ 1364 dbg_wl("flush pending work for LEB %d:%d (%d pending works)", 1365 vol_id, lnum, ubi->works_count); 1366 1367 while (found) { 1368 struct ubi_work *wrk, *tmp; 1369 found = 0; 1370 1371 down_read(&ubi->work_sem); 1372 spin_lock(&ubi->wl_lock); 1373 list_for_each_entry_safe(wrk, tmp, &ubi->works, list) { 1374 if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) && 1375 (lnum == UBI_ALL || wrk->lnum == lnum)) { 1376 list_del(&wrk->list); 1377 ubi->works_count -= 1; 1378 ubi_assert(ubi->works_count >= 0); 1379 spin_unlock(&ubi->wl_lock); 1380 1381 err = wrk->func(ubi, wrk, 0); 1382 if (err) { 1383 up_read(&ubi->work_sem); 1384 return err; 1385 } 1386 1387 spin_lock(&ubi->wl_lock); 1388 found = 1; 1389 break; 1390 } 1391 } 1392 spin_unlock(&ubi->wl_lock); 1393 up_read(&ubi->work_sem); 1394 } 1395 1396 /* 1397 * Make sure all the works which have been done in parallel are 1398 * finished. 1399 */ 1400 down_write(&ubi->work_sem); 1401 up_write(&ubi->work_sem); 1402 1403 return err; 1404 } 1405 1406 /** 1407 * tree_destroy - destroy an RB-tree. 1408 * @ubi: UBI device description object 1409 * @root: the root of the tree to destroy 1410 */ 1411 static void tree_destroy(struct ubi_device *ubi, struct rb_root *root) 1412 { 1413 struct rb_node *rb; 1414 struct ubi_wl_entry *e; 1415 1416 rb = root->rb_node; 1417 while (rb) { 1418 if (rb->rb_left) 1419 rb = rb->rb_left; 1420 else if (rb->rb_right) 1421 rb = rb->rb_right; 1422 else { 1423 e = rb_entry(rb, struct ubi_wl_entry, u.rb); 1424 1425 rb = rb_parent(rb); 1426 if (rb) { 1427 if (rb->rb_left == &e->u.rb) 1428 rb->rb_left = NULL; 1429 else 1430 rb->rb_right = NULL; 1431 } 1432 1433 wl_entry_destroy(ubi, e); 1434 } 1435 } 1436 } 1437 1438 /** 1439 * ubi_thread - UBI background thread. 1440 * @u: the UBI device description object pointer 1441 */ 1442 int ubi_thread(void *u) 1443 { 1444 int failures = 0; 1445 struct ubi_device *ubi = u; 1446 1447 ubi_msg(ubi, "background thread \"%s\" started, PID %d", 1448 ubi->bgt_name, task_pid_nr(current)); 1449 1450 set_freezable(); 1451 for (;;) { 1452 int err; 1453 1454 if (kthread_should_stop()) 1455 break; 1456 1457 if (try_to_freeze()) 1458 continue; 1459 1460 spin_lock(&ubi->wl_lock); 1461 if (list_empty(&ubi->works) || ubi->ro_mode || 1462 !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { 1463 set_current_state(TASK_INTERRUPTIBLE); 1464 spin_unlock(&ubi->wl_lock); 1465 schedule(); 1466 continue; 1467 } 1468 spin_unlock(&ubi->wl_lock); 1469 1470 err = do_work(ubi); 1471 if (err) { 1472 ubi_err(ubi, "%s: work failed with error code %d", 1473 ubi->bgt_name, err); 1474 if (failures++ > WL_MAX_FAILURES) { 1475 /* 1476 * Too many failures, disable the thread and 1477 * switch to read-only mode. 1478 */ 1479 ubi_msg(ubi, "%s: %d consecutive failures", 1480 ubi->bgt_name, WL_MAX_FAILURES); 1481 ubi_ro_mode(ubi); 1482 ubi->thread_enabled = 0; 1483 continue; 1484 } 1485 } else 1486 failures = 0; 1487 1488 cond_resched(); 1489 } 1490 1491 dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); 1492 return 0; 1493 } 1494 1495 /** 1496 * shutdown_work - shutdown all pending works. 1497 * @ubi: UBI device description object 1498 */ 1499 static void shutdown_work(struct ubi_device *ubi) 1500 { 1501 #ifdef CONFIG_MTD_UBI_FASTMAP 1502 #ifndef __UBOOT__ 1503 flush_work(&ubi->fm_work); 1504 #else 1505 /* in U-Boot, we have all work done */ 1506 #endif 1507 #endif 1508 while (!list_empty(&ubi->works)) { 1509 struct ubi_work *wrk; 1510 1511 wrk = list_entry(ubi->works.next, struct ubi_work, list); 1512 list_del(&wrk->list); 1513 wrk->func(ubi, wrk, 1); 1514 ubi->works_count -= 1; 1515 ubi_assert(ubi->works_count >= 0); 1516 } 1517 } 1518 1519 /** 1520 * ubi_wl_init - initialize the WL sub-system using attaching information. 1521 * @ubi: UBI device description object 1522 * @ai: attaching information 1523 * 1524 * This function returns zero in case of success, and a negative error code in 1525 * case of failure. 1526 */ 1527 int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) 1528 { 1529 int err, i, reserved_pebs, found_pebs = 0; 1530 struct rb_node *rb1, *rb2; 1531 struct ubi_ainf_volume *av; 1532 struct ubi_ainf_peb *aeb, *tmp; 1533 struct ubi_wl_entry *e; 1534 1535 ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT; 1536 spin_lock_init(&ubi->wl_lock); 1537 mutex_init(&ubi->move_mutex); 1538 init_rwsem(&ubi->work_sem); 1539 ubi->max_ec = ai->max_ec; 1540 INIT_LIST_HEAD(&ubi->works); 1541 1542 sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); 1543 1544 err = -ENOMEM; 1545 ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL); 1546 if (!ubi->lookuptbl) 1547 return err; 1548 1549 for (i = 0; i < UBI_PROT_QUEUE_LEN; i++) 1550 INIT_LIST_HEAD(&ubi->pq[i]); 1551 ubi->pq_head = 0; 1552 1553 ubi->free_count = 0; 1554 list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { 1555 cond_resched(); 1556 1557 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1558 if (!e) 1559 goto out_free; 1560 1561 e->pnum = aeb->pnum; 1562 e->ec = aeb->ec; 1563 ubi->lookuptbl[e->pnum] = e; 1564 if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { 1565 wl_entry_destroy(ubi, e); 1566 goto out_free; 1567 } 1568 1569 found_pebs++; 1570 } 1571 1572 list_for_each_entry(aeb, &ai->free, u.list) { 1573 cond_resched(); 1574 1575 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1576 if (!e) 1577 goto out_free; 1578 1579 e->pnum = aeb->pnum; 1580 e->ec = aeb->ec; 1581 ubi_assert(e->ec >= 0); 1582 1583 wl_tree_add(e, &ubi->free); 1584 ubi->free_count++; 1585 1586 ubi->lookuptbl[e->pnum] = e; 1587 1588 found_pebs++; 1589 } 1590 1591 ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { 1592 ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) { 1593 cond_resched(); 1594 1595 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1596 if (!e) 1597 goto out_free; 1598 1599 e->pnum = aeb->pnum; 1600 e->ec = aeb->ec; 1601 ubi->lookuptbl[e->pnum] = e; 1602 1603 if (!aeb->scrub) { 1604 dbg_wl("add PEB %d EC %d to the used tree", 1605 e->pnum, e->ec); 1606 wl_tree_add(e, &ubi->used); 1607 } else { 1608 dbg_wl("add PEB %d EC %d to the scrub tree", 1609 e->pnum, e->ec); 1610 wl_tree_add(e, &ubi->scrub); 1611 } 1612 1613 found_pebs++; 1614 } 1615 } 1616 1617 dbg_wl("found %i PEBs", found_pebs); 1618 1619 if (ubi->fm) { 1620 ubi_assert(ubi->good_peb_count == 1621 found_pebs + ubi->fm->used_blocks); 1622 1623 for (i = 0; i < ubi->fm->used_blocks; i++) { 1624 e = ubi->fm->e[i]; 1625 ubi->lookuptbl[e->pnum] = e; 1626 } 1627 } 1628 else 1629 ubi_assert(ubi->good_peb_count == found_pebs); 1630 1631 reserved_pebs = WL_RESERVED_PEBS; 1632 ubi_fastmap_init(ubi, &reserved_pebs); 1633 1634 if (ubi->avail_pebs < reserved_pebs) { 1635 ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)", 1636 ubi->avail_pebs, reserved_pebs); 1637 if (ubi->corr_peb_count) 1638 ubi_err(ubi, "%d PEBs are corrupted and not used", 1639 ubi->corr_peb_count); 1640 goto out_free; 1641 } 1642 ubi->avail_pebs -= reserved_pebs; 1643 ubi->rsvd_pebs += reserved_pebs; 1644 1645 /* Schedule wear-leveling if needed */ 1646 err = ensure_wear_leveling(ubi, 0); 1647 if (err) 1648 goto out_free; 1649 1650 return 0; 1651 1652 out_free: 1653 shutdown_work(ubi); 1654 tree_destroy(ubi, &ubi->used); 1655 tree_destroy(ubi, &ubi->free); 1656 tree_destroy(ubi, &ubi->scrub); 1657 kfree(ubi->lookuptbl); 1658 return err; 1659 } 1660 1661 /** 1662 * protection_queue_destroy - destroy the protection queue. 1663 * @ubi: UBI device description object 1664 */ 1665 static void protection_queue_destroy(struct ubi_device *ubi) 1666 { 1667 int i; 1668 struct ubi_wl_entry *e, *tmp; 1669 1670 for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) { 1671 list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) { 1672 list_del(&e->u.list); 1673 wl_entry_destroy(ubi, e); 1674 } 1675 } 1676 } 1677 1678 /** 1679 * ubi_wl_close - close the wear-leveling sub-system. 1680 * @ubi: UBI device description object 1681 */ 1682 void ubi_wl_close(struct ubi_device *ubi) 1683 { 1684 dbg_wl("close the WL sub-system"); 1685 ubi_fastmap_close(ubi); 1686 shutdown_work(ubi); 1687 protection_queue_destroy(ubi); 1688 tree_destroy(ubi, &ubi->used); 1689 tree_destroy(ubi, &ubi->erroneous); 1690 tree_destroy(ubi, &ubi->free); 1691 tree_destroy(ubi, &ubi->scrub); 1692 kfree(ubi->lookuptbl); 1693 } 1694 1695 /** 1696 * self_check_ec - make sure that the erase counter of a PEB is correct. 1697 * @ubi: UBI device description object 1698 * @pnum: the physical eraseblock number to check 1699 * @ec: the erase counter to check 1700 * 1701 * This function returns zero if the erase counter of physical eraseblock @pnum 1702 * is equivalent to @ec, and a negative error code if not or if an error 1703 * occurred. 1704 */ 1705 static int self_check_ec(struct ubi_device *ubi, int pnum, int ec) 1706 { 1707 int err; 1708 long long read_ec; 1709 struct ubi_ec_hdr *ec_hdr; 1710 1711 if (!ubi_dbg_chk_gen(ubi)) 1712 return 0; 1713 1714 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); 1715 if (!ec_hdr) 1716 return -ENOMEM; 1717 1718 err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); 1719 if (err && err != UBI_IO_BITFLIPS) { 1720 /* The header does not have to exist */ 1721 err = 0; 1722 goto out_free; 1723 } 1724 1725 read_ec = be64_to_cpu(ec_hdr->ec); 1726 if (ec != read_ec && read_ec - ec > 1) { 1727 ubi_err(ubi, "self-check failed for PEB %d", pnum); 1728 ubi_err(ubi, "read EC is %lld, should be %d", read_ec, ec); 1729 dump_stack(); 1730 err = 1; 1731 } else 1732 err = 0; 1733 1734 out_free: 1735 kfree(ec_hdr); 1736 return err; 1737 } 1738 1739 /** 1740 * self_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. 1741 * @ubi: UBI device description object 1742 * @e: the wear-leveling entry to check 1743 * @root: the root of the tree 1744 * 1745 * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it 1746 * is not. 1747 */ 1748 static int self_check_in_wl_tree(const struct ubi_device *ubi, 1749 struct ubi_wl_entry *e, struct rb_root *root) 1750 { 1751 if (!ubi_dbg_chk_gen(ubi)) 1752 return 0; 1753 1754 if (in_wl_tree(e, root)) 1755 return 0; 1756 1757 ubi_err(ubi, "self-check failed for PEB %d, EC %d, RB-tree %p ", 1758 e->pnum, e->ec, root); 1759 dump_stack(); 1760 return -EINVAL; 1761 } 1762 1763 /** 1764 * self_check_in_pq - check if wear-leveling entry is in the protection 1765 * queue. 1766 * @ubi: UBI device description object 1767 * @e: the wear-leveling entry to check 1768 * 1769 * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not. 1770 */ 1771 static int self_check_in_pq(const struct ubi_device *ubi, 1772 struct ubi_wl_entry *e) 1773 { 1774 struct ubi_wl_entry *p; 1775 int i; 1776 1777 if (!ubi_dbg_chk_gen(ubi)) 1778 return 0; 1779 1780 for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) 1781 list_for_each_entry(p, &ubi->pq[i], u.list) 1782 if (p == e) 1783 return 0; 1784 1785 ubi_err(ubi, "self-check failed for PEB %d, EC %d, Protect queue", 1786 e->pnum, e->ec); 1787 dump_stack(); 1788 return -EINVAL; 1789 } 1790 #ifndef CONFIG_MTD_UBI_FASTMAP 1791 static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) 1792 { 1793 struct ubi_wl_entry *e; 1794 1795 e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); 1796 self_check_in_wl_tree(ubi, e, &ubi->free); 1797 ubi->free_count--; 1798 ubi_assert(ubi->free_count >= 0); 1799 rb_erase(&e->u.rb, &ubi->free); 1800 1801 return e; 1802 } 1803 1804 /** 1805 * produce_free_peb - produce a free physical eraseblock. 1806 * @ubi: UBI device description object 1807 * 1808 * This function tries to make a free PEB by means of synchronous execution of 1809 * pending works. This may be needed if, for example the background thread is 1810 * disabled. Returns zero in case of success and a negative error code in case 1811 * of failure. 1812 */ 1813 static int produce_free_peb(struct ubi_device *ubi) 1814 { 1815 int err; 1816 1817 while (!ubi->free.rb_node && ubi->works_count) { 1818 spin_unlock(&ubi->wl_lock); 1819 1820 dbg_wl("do one work synchronously"); 1821 err = do_work(ubi); 1822 1823 spin_lock(&ubi->wl_lock); 1824 if (err) 1825 return err; 1826 } 1827 1828 return 0; 1829 } 1830 1831 /** 1832 * ubi_wl_get_peb - get a physical eraseblock. 1833 * @ubi: UBI device description object 1834 * 1835 * This function returns a physical eraseblock in case of success and a 1836 * negative error code in case of failure. 1837 * Returns with ubi->fm_eba_sem held in read mode! 1838 */ 1839 int ubi_wl_get_peb(struct ubi_device *ubi) 1840 { 1841 int err; 1842 struct ubi_wl_entry *e; 1843 1844 retry: 1845 down_read(&ubi->fm_eba_sem); 1846 spin_lock(&ubi->wl_lock); 1847 if (!ubi->free.rb_node) { 1848 if (ubi->works_count == 0) { 1849 ubi_err(ubi, "no free eraseblocks"); 1850 ubi_assert(list_empty(&ubi->works)); 1851 spin_unlock(&ubi->wl_lock); 1852 return -ENOSPC; 1853 } 1854 1855 err = produce_free_peb(ubi); 1856 if (err < 0) { 1857 spin_unlock(&ubi->wl_lock); 1858 return err; 1859 } 1860 spin_unlock(&ubi->wl_lock); 1861 up_read(&ubi->fm_eba_sem); 1862 goto retry; 1863 1864 } 1865 e = wl_get_wle(ubi); 1866 prot_queue_add(ubi, e); 1867 spin_unlock(&ubi->wl_lock); 1868 1869 err = ubi_self_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset, 1870 ubi->peb_size - ubi->vid_hdr_aloffset); 1871 if (err) { 1872 ubi_err(ubi, "new PEB %d does not contain all 0xFF bytes", e->pnum); 1873 return err; 1874 } 1875 1876 return e->pnum; 1877 } 1878 #else 1879 #include "fastmap-wl.c" 1880 #endif 1881